sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 65 arg = seq_get(args, 0) 66 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 67 68 69def build_lower(args: t.List) -> exp.Lower | exp.Hex: 70 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 73 74 75def build_upper(args: t.List) -> exp.Upper | exp.Hex: 76 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 79 80 81def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 82 def _builder(args: t.List, dialect: Dialect) -> E: 83 expression = expr_type( 84 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 85 ) 86 if len(args) > 2 and expr_type is exp.JSONExtract: 87 expression.set("expressions", args[2:]) 88 89 return expression 90 91 return _builder 92 93 94def build_mod(args: t.List) -> exp.Mod: 95 this = seq_get(args, 0) 96 expression = seq_get(args, 1) 97 98 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 99 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 100 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 101 102 return exp.Mod(this=this, expression=expression) 103 104 105class _Parser(type): 106 def __new__(cls, clsname, bases, attrs): 107 klass = super().__new__(cls, clsname, bases, attrs) 108 109 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 110 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 111 112 return klass 113 114 115class Parser(metaclass=_Parser): 116 """ 117 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 118 119 Args: 120 error_level: The desired error level. 121 Default: ErrorLevel.IMMEDIATE 122 error_message_context: The amount of context to capture from a query string when displaying 123 the error message (in number of characters). 124 Default: 100 125 max_errors: Maximum number of error messages to include in a raised ParseError. 126 This is only relevant if error_level is ErrorLevel.RAISE. 127 Default: 3 128 """ 129 130 FUNCTIONS: t.Dict[str, t.Callable] = { 131 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 132 "CONCAT": lambda args, dialect: exp.Concat( 133 expressions=args, 134 safe=not dialect.STRICT_STRING_CONCAT, 135 coalesce=dialect.CONCAT_COALESCE, 136 ), 137 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 138 expressions=args, 139 safe=not dialect.STRICT_STRING_CONCAT, 140 coalesce=dialect.CONCAT_COALESCE, 141 ), 142 "DATE_TO_DATE_STR": lambda args: exp.Cast( 143 this=seq_get(args, 0), 144 to=exp.DataType(this=exp.DataType.Type.TEXT), 145 ), 146 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 147 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 148 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 149 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 150 "LIKE": build_like, 151 "LOG": build_logarithm, 152 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 153 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 154 "MOD": build_mod, 155 "TIME_TO_TIME_STR": lambda args: exp.Cast( 156 this=seq_get(args, 0), 157 to=exp.DataType(this=exp.DataType.Type.TEXT), 158 ), 159 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 160 this=exp.Cast( 161 this=seq_get(args, 0), 162 to=exp.DataType(this=exp.DataType.Type.TEXT), 163 ), 164 start=exp.Literal.number(1), 165 length=exp.Literal.number(10), 166 ), 167 "VAR_MAP": build_var_map, 168 "LOWER": build_lower, 169 "UPPER": build_upper, 170 "HEX": build_hex, 171 "TO_HEX": build_hex, 172 } 173 174 NO_PAREN_FUNCTIONS = { 175 TokenType.CURRENT_DATE: exp.CurrentDate, 176 TokenType.CURRENT_DATETIME: exp.CurrentDate, 177 TokenType.CURRENT_TIME: exp.CurrentTime, 178 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 179 TokenType.CURRENT_USER: exp.CurrentUser, 180 } 181 182 STRUCT_TYPE_TOKENS = { 183 TokenType.NESTED, 184 TokenType.OBJECT, 185 TokenType.STRUCT, 186 } 187 188 NESTED_TYPE_TOKENS = { 189 TokenType.ARRAY, 190 TokenType.LOWCARDINALITY, 191 TokenType.MAP, 192 TokenType.NULLABLE, 193 *STRUCT_TYPE_TOKENS, 194 } 195 196 ENUM_TYPE_TOKENS = { 197 TokenType.ENUM, 198 TokenType.ENUM8, 199 TokenType.ENUM16, 200 } 201 202 AGGREGATE_TYPE_TOKENS = { 203 TokenType.AGGREGATEFUNCTION, 204 TokenType.SIMPLEAGGREGATEFUNCTION, 205 } 206 207 TYPE_TOKENS = { 208 TokenType.BIT, 209 TokenType.BOOLEAN, 210 TokenType.TINYINT, 211 TokenType.UTINYINT, 212 TokenType.SMALLINT, 213 TokenType.USMALLINT, 214 TokenType.INT, 215 TokenType.UINT, 216 TokenType.BIGINT, 217 TokenType.UBIGINT, 218 TokenType.INT128, 219 TokenType.UINT128, 220 TokenType.INT256, 221 TokenType.UINT256, 222 TokenType.MEDIUMINT, 223 TokenType.UMEDIUMINT, 224 TokenType.FIXEDSTRING, 225 TokenType.FLOAT, 226 TokenType.DOUBLE, 227 TokenType.CHAR, 228 TokenType.NCHAR, 229 TokenType.VARCHAR, 230 TokenType.NVARCHAR, 231 TokenType.BPCHAR, 232 TokenType.TEXT, 233 TokenType.MEDIUMTEXT, 234 TokenType.LONGTEXT, 235 TokenType.MEDIUMBLOB, 236 TokenType.LONGBLOB, 237 TokenType.BINARY, 238 TokenType.VARBINARY, 239 TokenType.JSON, 240 TokenType.JSONB, 241 TokenType.INTERVAL, 242 TokenType.TINYBLOB, 243 TokenType.TINYTEXT, 244 TokenType.TIME, 245 TokenType.TIMETZ, 246 TokenType.TIMESTAMP, 247 TokenType.TIMESTAMP_S, 248 TokenType.TIMESTAMP_MS, 249 TokenType.TIMESTAMP_NS, 250 TokenType.TIMESTAMPTZ, 251 TokenType.TIMESTAMPLTZ, 252 TokenType.TIMESTAMPNTZ, 253 TokenType.DATETIME, 254 TokenType.DATETIME64, 255 TokenType.DATE, 256 TokenType.DATE32, 257 TokenType.INT4RANGE, 258 TokenType.INT4MULTIRANGE, 259 TokenType.INT8RANGE, 260 TokenType.INT8MULTIRANGE, 261 TokenType.NUMRANGE, 262 TokenType.NUMMULTIRANGE, 263 TokenType.TSRANGE, 264 TokenType.TSMULTIRANGE, 265 TokenType.TSTZRANGE, 266 TokenType.TSTZMULTIRANGE, 267 TokenType.DATERANGE, 268 TokenType.DATEMULTIRANGE, 269 TokenType.DECIMAL, 270 TokenType.UDECIMAL, 271 TokenType.BIGDECIMAL, 272 TokenType.UUID, 273 TokenType.GEOGRAPHY, 274 TokenType.GEOMETRY, 275 TokenType.HLLSKETCH, 276 TokenType.HSTORE, 277 TokenType.PSEUDO_TYPE, 278 TokenType.SUPER, 279 TokenType.SERIAL, 280 TokenType.SMALLSERIAL, 281 TokenType.BIGSERIAL, 282 TokenType.XML, 283 TokenType.YEAR, 284 TokenType.UNIQUEIDENTIFIER, 285 TokenType.USERDEFINED, 286 TokenType.MONEY, 287 TokenType.SMALLMONEY, 288 TokenType.ROWVERSION, 289 TokenType.IMAGE, 290 TokenType.VARIANT, 291 TokenType.OBJECT, 292 TokenType.OBJECT_IDENTIFIER, 293 TokenType.INET, 294 TokenType.IPADDRESS, 295 TokenType.IPPREFIX, 296 TokenType.IPV4, 297 TokenType.IPV6, 298 TokenType.UNKNOWN, 299 TokenType.NULL, 300 TokenType.NAME, 301 TokenType.TDIGEST, 302 *ENUM_TYPE_TOKENS, 303 *NESTED_TYPE_TOKENS, 304 *AGGREGATE_TYPE_TOKENS, 305 } 306 307 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 308 TokenType.BIGINT: TokenType.UBIGINT, 309 TokenType.INT: TokenType.UINT, 310 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 311 TokenType.SMALLINT: TokenType.USMALLINT, 312 TokenType.TINYINT: TokenType.UTINYINT, 313 TokenType.DECIMAL: TokenType.UDECIMAL, 314 } 315 316 SUBQUERY_PREDICATES = { 317 TokenType.ANY: exp.Any, 318 TokenType.ALL: exp.All, 319 TokenType.EXISTS: exp.Exists, 320 TokenType.SOME: exp.Any, 321 } 322 323 RESERVED_TOKENS = { 324 *Tokenizer.SINGLE_TOKENS.values(), 325 TokenType.SELECT, 326 } - {TokenType.IDENTIFIER} 327 328 DB_CREATABLES = { 329 TokenType.DATABASE, 330 TokenType.DICTIONARY, 331 TokenType.MODEL, 332 TokenType.SCHEMA, 333 TokenType.SEQUENCE, 334 TokenType.STORAGE_INTEGRATION, 335 TokenType.TABLE, 336 TokenType.TAG, 337 TokenType.VIEW, 338 } 339 340 CREATABLES = { 341 TokenType.COLUMN, 342 TokenType.CONSTRAINT, 343 TokenType.FOREIGN_KEY, 344 TokenType.FUNCTION, 345 TokenType.INDEX, 346 TokenType.PROCEDURE, 347 *DB_CREATABLES, 348 } 349 350 # Tokens that can represent identifiers 351 ID_VAR_TOKENS = { 352 TokenType.VAR, 353 TokenType.ANTI, 354 TokenType.APPLY, 355 TokenType.ASC, 356 TokenType.ASOF, 357 TokenType.AUTO_INCREMENT, 358 TokenType.BEGIN, 359 TokenType.BPCHAR, 360 TokenType.CACHE, 361 TokenType.CASE, 362 TokenType.COLLATE, 363 TokenType.COMMAND, 364 TokenType.COMMENT, 365 TokenType.COMMIT, 366 TokenType.CONSTRAINT, 367 TokenType.COPY, 368 TokenType.DEFAULT, 369 TokenType.DELETE, 370 TokenType.DESC, 371 TokenType.DESCRIBE, 372 TokenType.DICTIONARY, 373 TokenType.DIV, 374 TokenType.END, 375 TokenType.EXECUTE, 376 TokenType.ESCAPE, 377 TokenType.FALSE, 378 TokenType.FIRST, 379 TokenType.FILTER, 380 TokenType.FINAL, 381 TokenType.FORMAT, 382 TokenType.FULL, 383 TokenType.IDENTIFIER, 384 TokenType.IS, 385 TokenType.ISNULL, 386 TokenType.INTERVAL, 387 TokenType.KEEP, 388 TokenType.KILL, 389 TokenType.LEFT, 390 TokenType.LOAD, 391 TokenType.MERGE, 392 TokenType.NATURAL, 393 TokenType.NEXT, 394 TokenType.OFFSET, 395 TokenType.OPERATOR, 396 TokenType.ORDINALITY, 397 TokenType.OVERLAPS, 398 TokenType.OVERWRITE, 399 TokenType.PARTITION, 400 TokenType.PERCENT, 401 TokenType.PIVOT, 402 TokenType.PRAGMA, 403 TokenType.RANGE, 404 TokenType.RECURSIVE, 405 TokenType.REFERENCES, 406 TokenType.REFRESH, 407 TokenType.REPLACE, 408 TokenType.RIGHT, 409 TokenType.ROLLUP, 410 TokenType.ROW, 411 TokenType.ROWS, 412 TokenType.SEMI, 413 TokenType.SET, 414 TokenType.SETTINGS, 415 TokenType.SHOW, 416 TokenType.TEMPORARY, 417 TokenType.TOP, 418 TokenType.TRUE, 419 TokenType.TRUNCATE, 420 TokenType.UNIQUE, 421 TokenType.UNPIVOT, 422 TokenType.UPDATE, 423 TokenType.USE, 424 TokenType.VOLATILE, 425 TokenType.WINDOW, 426 *CREATABLES, 427 *SUBQUERY_PREDICATES, 428 *TYPE_TOKENS, 429 *NO_PAREN_FUNCTIONS, 430 } 431 432 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 433 434 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 435 TokenType.ANTI, 436 TokenType.APPLY, 437 TokenType.ASOF, 438 TokenType.FULL, 439 TokenType.LEFT, 440 TokenType.LOCK, 441 TokenType.NATURAL, 442 TokenType.OFFSET, 443 TokenType.RIGHT, 444 TokenType.SEMI, 445 TokenType.WINDOW, 446 } 447 448 ALIAS_TOKENS = ID_VAR_TOKENS 449 450 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 451 452 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 453 454 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 455 456 FUNC_TOKENS = { 457 TokenType.COLLATE, 458 TokenType.COMMAND, 459 TokenType.CURRENT_DATE, 460 TokenType.CURRENT_DATETIME, 461 TokenType.CURRENT_TIMESTAMP, 462 TokenType.CURRENT_TIME, 463 TokenType.CURRENT_USER, 464 TokenType.FILTER, 465 TokenType.FIRST, 466 TokenType.FORMAT, 467 TokenType.GLOB, 468 TokenType.IDENTIFIER, 469 TokenType.INDEX, 470 TokenType.ISNULL, 471 TokenType.ILIKE, 472 TokenType.INSERT, 473 TokenType.LIKE, 474 TokenType.MERGE, 475 TokenType.OFFSET, 476 TokenType.PRIMARY_KEY, 477 TokenType.RANGE, 478 TokenType.REPLACE, 479 TokenType.RLIKE, 480 TokenType.ROW, 481 TokenType.UNNEST, 482 TokenType.VAR, 483 TokenType.LEFT, 484 TokenType.RIGHT, 485 TokenType.SEQUENCE, 486 TokenType.DATE, 487 TokenType.DATETIME, 488 TokenType.TABLE, 489 TokenType.TIMESTAMP, 490 TokenType.TIMESTAMPTZ, 491 TokenType.TRUNCATE, 492 TokenType.WINDOW, 493 TokenType.XOR, 494 *TYPE_TOKENS, 495 *SUBQUERY_PREDICATES, 496 } 497 498 CONJUNCTION = { 499 TokenType.AND: exp.And, 500 TokenType.OR: exp.Or, 501 } 502 503 EQUALITY = { 504 TokenType.EQ: exp.EQ, 505 TokenType.NEQ: exp.NEQ, 506 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 507 } 508 509 COMPARISON = { 510 TokenType.GT: exp.GT, 511 TokenType.GTE: exp.GTE, 512 TokenType.LT: exp.LT, 513 TokenType.LTE: exp.LTE, 514 } 515 516 BITWISE = { 517 TokenType.AMP: exp.BitwiseAnd, 518 TokenType.CARET: exp.BitwiseXor, 519 TokenType.PIPE: exp.BitwiseOr, 520 } 521 522 TERM = { 523 TokenType.DASH: exp.Sub, 524 TokenType.PLUS: exp.Add, 525 TokenType.MOD: exp.Mod, 526 TokenType.COLLATE: exp.Collate, 527 } 528 529 FACTOR = { 530 TokenType.DIV: exp.IntDiv, 531 TokenType.LR_ARROW: exp.Distance, 532 TokenType.SLASH: exp.Div, 533 TokenType.STAR: exp.Mul, 534 } 535 536 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 537 538 TIMES = { 539 TokenType.TIME, 540 TokenType.TIMETZ, 541 } 542 543 TIMESTAMPS = { 544 TokenType.TIMESTAMP, 545 TokenType.TIMESTAMPTZ, 546 TokenType.TIMESTAMPLTZ, 547 *TIMES, 548 } 549 550 SET_OPERATIONS = { 551 TokenType.UNION, 552 TokenType.INTERSECT, 553 TokenType.EXCEPT, 554 } 555 556 JOIN_METHODS = { 557 TokenType.ASOF, 558 TokenType.NATURAL, 559 TokenType.POSITIONAL, 560 } 561 562 JOIN_SIDES = { 563 TokenType.LEFT, 564 TokenType.RIGHT, 565 TokenType.FULL, 566 } 567 568 JOIN_KINDS = { 569 TokenType.INNER, 570 TokenType.OUTER, 571 TokenType.CROSS, 572 TokenType.SEMI, 573 TokenType.ANTI, 574 } 575 576 JOIN_HINTS: t.Set[str] = set() 577 578 LAMBDAS = { 579 TokenType.ARROW: lambda self, expressions: self.expression( 580 exp.Lambda, 581 this=self._replace_lambda( 582 self._parse_conjunction(), 583 {node.name for node in expressions}, 584 ), 585 expressions=expressions, 586 ), 587 TokenType.FARROW: lambda self, expressions: self.expression( 588 exp.Kwarg, 589 this=exp.var(expressions[0].name), 590 expression=self._parse_conjunction(), 591 ), 592 } 593 594 COLUMN_OPERATORS = { 595 TokenType.DOT: None, 596 TokenType.DCOLON: lambda self, this, to: self.expression( 597 exp.Cast if self.STRICT_CAST else exp.TryCast, 598 this=this, 599 to=to, 600 ), 601 TokenType.ARROW: lambda self, this, path: self.expression( 602 exp.JSONExtract, 603 this=this, 604 expression=self.dialect.to_json_path(path), 605 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 606 ), 607 TokenType.DARROW: lambda self, this, path: self.expression( 608 exp.JSONExtractScalar, 609 this=this, 610 expression=self.dialect.to_json_path(path), 611 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 612 ), 613 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 614 exp.JSONBExtract, 615 this=this, 616 expression=path, 617 ), 618 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 619 exp.JSONBExtractScalar, 620 this=this, 621 expression=path, 622 ), 623 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 624 exp.JSONBContains, 625 this=this, 626 expression=key, 627 ), 628 } 629 630 EXPRESSION_PARSERS = { 631 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 632 exp.Column: lambda self: self._parse_column(), 633 exp.Condition: lambda self: self._parse_conjunction(), 634 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 635 exp.Expression: lambda self: self._parse_expression(), 636 exp.From: lambda self: self._parse_from(joins=True), 637 exp.Group: lambda self: self._parse_group(), 638 exp.Having: lambda self: self._parse_having(), 639 exp.Identifier: lambda self: self._parse_id_var(), 640 exp.Join: lambda self: self._parse_join(), 641 exp.Lambda: lambda self: self._parse_lambda(), 642 exp.Lateral: lambda self: self._parse_lateral(), 643 exp.Limit: lambda self: self._parse_limit(), 644 exp.Offset: lambda self: self._parse_offset(), 645 exp.Order: lambda self: self._parse_order(), 646 exp.Ordered: lambda self: self._parse_ordered(), 647 exp.Properties: lambda self: self._parse_properties(), 648 exp.Qualify: lambda self: self._parse_qualify(), 649 exp.Returning: lambda self: self._parse_returning(), 650 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 651 exp.Table: lambda self: self._parse_table_parts(), 652 exp.TableAlias: lambda self: self._parse_table_alias(), 653 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 654 exp.Where: lambda self: self._parse_where(), 655 exp.Window: lambda self: self._parse_named_window(), 656 exp.With: lambda self: self._parse_with(), 657 "JOIN_TYPE": lambda self: self._parse_join_parts(), 658 } 659 660 STATEMENT_PARSERS = { 661 TokenType.ALTER: lambda self: self._parse_alter(), 662 TokenType.BEGIN: lambda self: self._parse_transaction(), 663 TokenType.CACHE: lambda self: self._parse_cache(), 664 TokenType.COMMENT: lambda self: self._parse_comment(), 665 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 666 TokenType.COPY: lambda self: self._parse_copy(), 667 TokenType.CREATE: lambda self: self._parse_create(), 668 TokenType.DELETE: lambda self: self._parse_delete(), 669 TokenType.DESC: lambda self: self._parse_describe(), 670 TokenType.DESCRIBE: lambda self: self._parse_describe(), 671 TokenType.DROP: lambda self: self._parse_drop(), 672 TokenType.INSERT: lambda self: self._parse_insert(), 673 TokenType.KILL: lambda self: self._parse_kill(), 674 TokenType.LOAD: lambda self: self._parse_load(), 675 TokenType.MERGE: lambda self: self._parse_merge(), 676 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 677 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 678 TokenType.REFRESH: lambda self: self._parse_refresh(), 679 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 680 TokenType.SET: lambda self: self._parse_set(), 681 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 682 TokenType.UNCACHE: lambda self: self._parse_uncache(), 683 TokenType.UPDATE: lambda self: self._parse_update(), 684 TokenType.USE: lambda self: self.expression( 685 exp.Use, 686 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 687 this=self._parse_table(schema=False), 688 ), 689 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 690 } 691 692 UNARY_PARSERS = { 693 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 694 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 695 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 696 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 697 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 698 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 699 } 700 701 STRING_PARSERS = { 702 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 703 exp.RawString, this=token.text 704 ), 705 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 706 exp.National, this=token.text 707 ), 708 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 709 TokenType.STRING: lambda self, token: self.expression( 710 exp.Literal, this=token.text, is_string=True 711 ), 712 TokenType.UNICODE_STRING: lambda self, token: self.expression( 713 exp.UnicodeString, 714 this=token.text, 715 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 716 ), 717 } 718 719 NUMERIC_PARSERS = { 720 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 721 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 722 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 723 TokenType.NUMBER: lambda self, token: self.expression( 724 exp.Literal, this=token.text, is_string=False 725 ), 726 } 727 728 PRIMARY_PARSERS = { 729 **STRING_PARSERS, 730 **NUMERIC_PARSERS, 731 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 732 TokenType.NULL: lambda self, _: self.expression(exp.Null), 733 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 734 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 735 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 736 TokenType.STAR: lambda self, _: self.expression( 737 exp.Star, 738 **{ 739 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 740 "replace": self._parse_star_op("REPLACE"), 741 "rename": self._parse_star_op("RENAME"), 742 }, 743 ), 744 } 745 746 PLACEHOLDER_PARSERS = { 747 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 748 TokenType.PARAMETER: lambda self: self._parse_parameter(), 749 TokenType.COLON: lambda self: ( 750 self.expression(exp.Placeholder, this=self._prev.text) 751 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 752 else None 753 ), 754 } 755 756 RANGE_PARSERS = { 757 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 758 TokenType.GLOB: binary_range_parser(exp.Glob), 759 TokenType.ILIKE: binary_range_parser(exp.ILike), 760 TokenType.IN: lambda self, this: self._parse_in(this), 761 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 762 TokenType.IS: lambda self, this: self._parse_is(this), 763 TokenType.LIKE: binary_range_parser(exp.Like), 764 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 765 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 766 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 767 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 768 } 769 770 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 771 "ALLOWED_VALUES": lambda self: self.expression( 772 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 773 ), 774 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 775 "AUTO": lambda self: self._parse_auto_property(), 776 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 777 "BACKUP": lambda self: self.expression( 778 exp.BackupProperty, this=self._parse_var(any_token=True) 779 ), 780 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 781 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 782 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 783 "CHECKSUM": lambda self: self._parse_checksum(), 784 "CLUSTER BY": lambda self: self._parse_cluster(), 785 "CLUSTERED": lambda self: self._parse_clustered_by(), 786 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 787 exp.CollateProperty, **kwargs 788 ), 789 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 790 "CONTAINS": lambda self: self._parse_contains_property(), 791 "COPY": lambda self: self._parse_copy_property(), 792 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 793 "DEFINER": lambda self: self._parse_definer(), 794 "DETERMINISTIC": lambda self: self.expression( 795 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 796 ), 797 "DISTKEY": lambda self: self._parse_distkey(), 798 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 799 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 800 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 801 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 802 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 803 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 804 "FREESPACE": lambda self: self._parse_freespace(), 805 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 806 "HEAP": lambda self: self.expression(exp.HeapProperty), 807 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 808 "IMMUTABLE": lambda self: self.expression( 809 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 810 ), 811 "INHERITS": lambda self: self.expression( 812 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 813 ), 814 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 815 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 816 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 817 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 818 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 819 "LIKE": lambda self: self._parse_create_like(), 820 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 821 "LOCK": lambda self: self._parse_locking(), 822 "LOCKING": lambda self: self._parse_locking(), 823 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 824 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 825 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 826 "MODIFIES": lambda self: self._parse_modifies_property(), 827 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 828 "NO": lambda self: self._parse_no_property(), 829 "ON": lambda self: self._parse_on_property(), 830 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 831 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 832 "PARTITION": lambda self: self._parse_partitioned_of(), 833 "PARTITION BY": lambda self: self._parse_partitioned_by(), 834 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 835 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 836 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 837 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 838 "READS": lambda self: self._parse_reads_property(), 839 "REMOTE": lambda self: self._parse_remote_with_connection(), 840 "RETURNS": lambda self: self._parse_returns(), 841 "ROW": lambda self: self._parse_row(), 842 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 843 "SAMPLE": lambda self: self.expression( 844 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 845 ), 846 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 847 "SETTINGS": lambda self: self.expression( 848 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 849 ), 850 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 851 "SORTKEY": lambda self: self._parse_sortkey(), 852 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 853 "STABLE": lambda self: self.expression( 854 exp.StabilityProperty, this=exp.Literal.string("STABLE") 855 ), 856 "STORED": lambda self: self._parse_stored(), 857 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 858 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 859 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 860 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 861 "TO": lambda self: self._parse_to_table(), 862 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 863 "TRANSFORM": lambda self: self.expression( 864 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 865 ), 866 "TTL": lambda self: self._parse_ttl(), 867 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 868 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 869 "VOLATILE": lambda self: self._parse_volatile_property(), 870 "WITH": lambda self: self._parse_with_property(), 871 } 872 873 CONSTRAINT_PARSERS = { 874 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 875 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 876 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 877 "CHARACTER SET": lambda self: self.expression( 878 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 879 ), 880 "CHECK": lambda self: self.expression( 881 exp.CheckColumnConstraint, 882 this=self._parse_wrapped(self._parse_conjunction), 883 enforced=self._match_text_seq("ENFORCED"), 884 ), 885 "COLLATE": lambda self: self.expression( 886 exp.CollateColumnConstraint, this=self._parse_var() 887 ), 888 "COMMENT": lambda self: self.expression( 889 exp.CommentColumnConstraint, this=self._parse_string() 890 ), 891 "COMPRESS": lambda self: self._parse_compress(), 892 "CLUSTERED": lambda self: self.expression( 893 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 894 ), 895 "NONCLUSTERED": lambda self: self.expression( 896 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 897 ), 898 "DEFAULT": lambda self: self.expression( 899 exp.DefaultColumnConstraint, this=self._parse_bitwise() 900 ), 901 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 902 "EPHEMERAL": lambda self: self.expression( 903 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 904 ), 905 "EXCLUDE": lambda self: self.expression( 906 exp.ExcludeColumnConstraint, this=self._parse_index_params() 907 ), 908 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 909 "FORMAT": lambda self: self.expression( 910 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 911 ), 912 "GENERATED": lambda self: self._parse_generated_as_identity(), 913 "IDENTITY": lambda self: self._parse_auto_increment(), 914 "INLINE": lambda self: self._parse_inline(), 915 "LIKE": lambda self: self._parse_create_like(), 916 "NOT": lambda self: self._parse_not_constraint(), 917 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 918 "ON": lambda self: ( 919 self._match(TokenType.UPDATE) 920 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 921 ) 922 or self.expression(exp.OnProperty, this=self._parse_id_var()), 923 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 924 "PERIOD": lambda self: self._parse_period_for_system_time(), 925 "PRIMARY KEY": lambda self: self._parse_primary_key(), 926 "REFERENCES": lambda self: self._parse_references(match=False), 927 "TITLE": lambda self: self.expression( 928 exp.TitleColumnConstraint, this=self._parse_var_or_string() 929 ), 930 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 931 "UNIQUE": lambda self: self._parse_unique(), 932 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 933 "WITH": lambda self: self.expression( 934 exp.Properties, expressions=self._parse_wrapped_properties() 935 ), 936 } 937 938 ALTER_PARSERS = { 939 "ADD": lambda self: self._parse_alter_table_add(), 940 "ALTER": lambda self: self._parse_alter_table_alter(), 941 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 942 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 943 "DROP": lambda self: self._parse_alter_table_drop(), 944 "RENAME": lambda self: self._parse_alter_table_rename(), 945 } 946 947 ALTER_ALTER_PARSERS = { 948 "DISTKEY": lambda self: self._parse_alter_diststyle(), 949 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 950 "SORTKEY": lambda self: self._parse_alter_sortkey(), 951 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 952 } 953 954 SCHEMA_UNNAMED_CONSTRAINTS = { 955 "CHECK", 956 "EXCLUDE", 957 "FOREIGN KEY", 958 "LIKE", 959 "PERIOD", 960 "PRIMARY KEY", 961 "UNIQUE", 962 } 963 964 NO_PAREN_FUNCTION_PARSERS = { 965 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 966 "CASE": lambda self: self._parse_case(), 967 "IF": lambda self: self._parse_if(), 968 "NEXT": lambda self: self._parse_next_value_for(), 969 } 970 971 INVALID_FUNC_NAME_TOKENS = { 972 TokenType.IDENTIFIER, 973 TokenType.STRING, 974 } 975 976 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 977 978 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 979 980 FUNCTION_PARSERS = { 981 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 982 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 983 "DECODE": lambda self: self._parse_decode(), 984 "EXTRACT": lambda self: self._parse_extract(), 985 "JSON_OBJECT": lambda self: self._parse_json_object(), 986 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 987 "JSON_TABLE": lambda self: self._parse_json_table(), 988 "MATCH": lambda self: self._parse_match_against(), 989 "OPENJSON": lambda self: self._parse_open_json(), 990 "POSITION": lambda self: self._parse_position(), 991 "PREDICT": lambda self: self._parse_predict(), 992 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 993 "STRING_AGG": lambda self: self._parse_string_agg(), 994 "SUBSTRING": lambda self: self._parse_substring(), 995 "TRIM": lambda self: self._parse_trim(), 996 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 997 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 998 } 999 1000 QUERY_MODIFIER_PARSERS = { 1001 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1002 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1003 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1004 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1005 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1006 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1007 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1008 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1009 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1010 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1011 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1012 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1013 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1014 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1015 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1016 TokenType.CLUSTER_BY: lambda self: ( 1017 "cluster", 1018 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1019 ), 1020 TokenType.DISTRIBUTE_BY: lambda self: ( 1021 "distribute", 1022 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1023 ), 1024 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1025 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1026 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1027 } 1028 1029 SET_PARSERS = { 1030 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1031 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1032 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1033 "TRANSACTION": lambda self: self._parse_set_transaction(), 1034 } 1035 1036 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1037 1038 TYPE_LITERAL_PARSERS = { 1039 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1040 } 1041 1042 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1043 1044 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1045 1046 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1047 1048 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1049 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1050 "ISOLATION": ( 1051 ("LEVEL", "REPEATABLE", "READ"), 1052 ("LEVEL", "READ", "COMMITTED"), 1053 ("LEVEL", "READ", "UNCOMITTED"), 1054 ("LEVEL", "SERIALIZABLE"), 1055 ), 1056 "READ": ("WRITE", "ONLY"), 1057 } 1058 1059 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1060 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1061 ) 1062 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1063 1064 CREATE_SEQUENCE: OPTIONS_TYPE = { 1065 "SCALE": ("EXTEND", "NOEXTEND"), 1066 "SHARD": ("EXTEND", "NOEXTEND"), 1067 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1068 **dict.fromkeys( 1069 ( 1070 "SESSION", 1071 "GLOBAL", 1072 "KEEP", 1073 "NOKEEP", 1074 "ORDER", 1075 "NOORDER", 1076 "NOCACHE", 1077 "CYCLE", 1078 "NOCYCLE", 1079 "NOMINVALUE", 1080 "NOMAXVALUE", 1081 "NOSCALE", 1082 "NOSHARD", 1083 ), 1084 tuple(), 1085 ), 1086 } 1087 1088 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1089 1090 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1091 1092 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1093 1094 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1095 1096 CLONE_KEYWORDS = {"CLONE", "COPY"} 1097 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1098 1099 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1100 1101 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1102 1103 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1104 1105 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1106 1107 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1108 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1109 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1110 1111 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1112 1113 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1114 1115 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1116 1117 DISTINCT_TOKENS = {TokenType.DISTINCT} 1118 1119 NULL_TOKENS = {TokenType.NULL} 1120 1121 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1122 1123 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1124 1125 STRICT_CAST = True 1126 1127 PREFIXED_PIVOT_COLUMNS = False 1128 IDENTIFY_PIVOT_STRINGS = False 1129 1130 LOG_DEFAULTS_TO_LN = False 1131 1132 # Whether ADD is present for each column added by ALTER TABLE 1133 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1134 1135 # Whether the table sample clause expects CSV syntax 1136 TABLESAMPLE_CSV = False 1137 1138 # The default method used for table sampling 1139 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1140 1141 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1142 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1143 1144 # Whether the TRIM function expects the characters to trim as its first argument 1145 TRIM_PATTERN_FIRST = False 1146 1147 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1148 STRING_ALIASES = False 1149 1150 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1151 MODIFIERS_ATTACHED_TO_UNION = True 1152 UNION_MODIFIERS = {"order", "limit", "offset"} 1153 1154 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1155 NO_PAREN_IF_COMMANDS = True 1156 1157 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1158 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1159 1160 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1161 # If this is True and '(' is not found, the keyword will be treated as an identifier 1162 VALUES_FOLLOWED_BY_PAREN = True 1163 1164 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1165 SUPPORTS_IMPLICIT_UNNEST = False 1166 1167 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1168 INTERVAL_SPANS = True 1169 1170 # Whether a PARTITION clause can follow a table reference 1171 SUPPORTS_PARTITION_SELECTION = False 1172 1173 __slots__ = ( 1174 "error_level", 1175 "error_message_context", 1176 "max_errors", 1177 "dialect", 1178 "sql", 1179 "errors", 1180 "_tokens", 1181 "_index", 1182 "_curr", 1183 "_next", 1184 "_prev", 1185 "_prev_comments", 1186 ) 1187 1188 # Autofilled 1189 SHOW_TRIE: t.Dict = {} 1190 SET_TRIE: t.Dict = {} 1191 1192 def __init__( 1193 self, 1194 error_level: t.Optional[ErrorLevel] = None, 1195 error_message_context: int = 100, 1196 max_errors: int = 3, 1197 dialect: DialectType = None, 1198 ): 1199 from sqlglot.dialects import Dialect 1200 1201 self.error_level = error_level or ErrorLevel.IMMEDIATE 1202 self.error_message_context = error_message_context 1203 self.max_errors = max_errors 1204 self.dialect = Dialect.get_or_raise(dialect) 1205 self.reset() 1206 1207 def reset(self): 1208 self.sql = "" 1209 self.errors = [] 1210 self._tokens = [] 1211 self._index = 0 1212 self._curr = None 1213 self._next = None 1214 self._prev = None 1215 self._prev_comments = None 1216 1217 def parse( 1218 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1219 ) -> t.List[t.Optional[exp.Expression]]: 1220 """ 1221 Parses a list of tokens and returns a list of syntax trees, one tree 1222 per parsed SQL statement. 1223 1224 Args: 1225 raw_tokens: The list of tokens. 1226 sql: The original SQL string, used to produce helpful debug messages. 1227 1228 Returns: 1229 The list of the produced syntax trees. 1230 """ 1231 return self._parse( 1232 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1233 ) 1234 1235 def parse_into( 1236 self, 1237 expression_types: exp.IntoType, 1238 raw_tokens: t.List[Token], 1239 sql: t.Optional[str] = None, 1240 ) -> t.List[t.Optional[exp.Expression]]: 1241 """ 1242 Parses a list of tokens into a given Expression type. If a collection of Expression 1243 types is given instead, this method will try to parse the token list into each one 1244 of them, stopping at the first for which the parsing succeeds. 1245 1246 Args: 1247 expression_types: The expression type(s) to try and parse the token list into. 1248 raw_tokens: The list of tokens. 1249 sql: The original SQL string, used to produce helpful debug messages. 1250 1251 Returns: 1252 The target Expression. 1253 """ 1254 errors = [] 1255 for expression_type in ensure_list(expression_types): 1256 parser = self.EXPRESSION_PARSERS.get(expression_type) 1257 if not parser: 1258 raise TypeError(f"No parser registered for {expression_type}") 1259 1260 try: 1261 return self._parse(parser, raw_tokens, sql) 1262 except ParseError as e: 1263 e.errors[0]["into_expression"] = expression_type 1264 errors.append(e) 1265 1266 raise ParseError( 1267 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1268 errors=merge_errors(errors), 1269 ) from errors[-1] 1270 1271 def _parse( 1272 self, 1273 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1274 raw_tokens: t.List[Token], 1275 sql: t.Optional[str] = None, 1276 ) -> t.List[t.Optional[exp.Expression]]: 1277 self.reset() 1278 self.sql = sql or "" 1279 1280 total = len(raw_tokens) 1281 chunks: t.List[t.List[Token]] = [[]] 1282 1283 for i, token in enumerate(raw_tokens): 1284 if token.token_type == TokenType.SEMICOLON: 1285 if token.comments: 1286 chunks.append([token]) 1287 1288 if i < total - 1: 1289 chunks.append([]) 1290 else: 1291 chunks[-1].append(token) 1292 1293 expressions = [] 1294 1295 for tokens in chunks: 1296 self._index = -1 1297 self._tokens = tokens 1298 self._advance() 1299 1300 expressions.append(parse_method(self)) 1301 1302 if self._index < len(self._tokens): 1303 self.raise_error("Invalid expression / Unexpected token") 1304 1305 self.check_errors() 1306 1307 return expressions 1308 1309 def check_errors(self) -> None: 1310 """Logs or raises any found errors, depending on the chosen error level setting.""" 1311 if self.error_level == ErrorLevel.WARN: 1312 for error in self.errors: 1313 logger.error(str(error)) 1314 elif self.error_level == ErrorLevel.RAISE and self.errors: 1315 raise ParseError( 1316 concat_messages(self.errors, self.max_errors), 1317 errors=merge_errors(self.errors), 1318 ) 1319 1320 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1321 """ 1322 Appends an error in the list of recorded errors or raises it, depending on the chosen 1323 error level setting. 1324 """ 1325 token = token or self._curr or self._prev or Token.string("") 1326 start = token.start 1327 end = token.end + 1 1328 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1329 highlight = self.sql[start:end] 1330 end_context = self.sql[end : end + self.error_message_context] 1331 1332 error = ParseError.new( 1333 f"{message}. Line {token.line}, Col: {token.col}.\n" 1334 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1335 description=message, 1336 line=token.line, 1337 col=token.col, 1338 start_context=start_context, 1339 highlight=highlight, 1340 end_context=end_context, 1341 ) 1342 1343 if self.error_level == ErrorLevel.IMMEDIATE: 1344 raise error 1345 1346 self.errors.append(error) 1347 1348 def expression( 1349 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1350 ) -> E: 1351 """ 1352 Creates a new, validated Expression. 1353 1354 Args: 1355 exp_class: The expression class to instantiate. 1356 comments: An optional list of comments to attach to the expression. 1357 kwargs: The arguments to set for the expression along with their respective values. 1358 1359 Returns: 1360 The target expression. 1361 """ 1362 instance = exp_class(**kwargs) 1363 instance.add_comments(comments) if comments else self._add_comments(instance) 1364 return self.validate_expression(instance) 1365 1366 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1367 if expression and self._prev_comments: 1368 expression.add_comments(self._prev_comments) 1369 self._prev_comments = None 1370 1371 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1372 """ 1373 Validates an Expression, making sure that all its mandatory arguments are set. 1374 1375 Args: 1376 expression: The expression to validate. 1377 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1378 1379 Returns: 1380 The validated expression. 1381 """ 1382 if self.error_level != ErrorLevel.IGNORE: 1383 for error_message in expression.error_messages(args): 1384 self.raise_error(error_message) 1385 1386 return expression 1387 1388 def _find_sql(self, start: Token, end: Token) -> str: 1389 return self.sql[start.start : end.end + 1] 1390 1391 def _is_connected(self) -> bool: 1392 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1393 1394 def _advance(self, times: int = 1) -> None: 1395 self._index += times 1396 self._curr = seq_get(self._tokens, self._index) 1397 self._next = seq_get(self._tokens, self._index + 1) 1398 1399 if self._index > 0: 1400 self._prev = self._tokens[self._index - 1] 1401 self._prev_comments = self._prev.comments 1402 else: 1403 self._prev = None 1404 self._prev_comments = None 1405 1406 def _retreat(self, index: int) -> None: 1407 if index != self._index: 1408 self._advance(index - self._index) 1409 1410 def _warn_unsupported(self) -> None: 1411 if len(self._tokens) <= 1: 1412 return 1413 1414 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1415 # interested in emitting a warning for the one being currently processed. 1416 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1417 1418 logger.warning( 1419 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1420 ) 1421 1422 def _parse_command(self) -> exp.Command: 1423 self._warn_unsupported() 1424 return self.expression( 1425 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1426 ) 1427 1428 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1429 """ 1430 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1431 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1432 the parser state accordingly 1433 """ 1434 index = self._index 1435 error_level = self.error_level 1436 1437 self.error_level = ErrorLevel.IMMEDIATE 1438 try: 1439 this = parse_method() 1440 except ParseError: 1441 this = None 1442 finally: 1443 if not this or retreat: 1444 self._retreat(index) 1445 self.error_level = error_level 1446 1447 return this 1448 1449 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1450 start = self._prev 1451 exists = self._parse_exists() if allow_exists else None 1452 1453 self._match(TokenType.ON) 1454 1455 materialized = self._match_text_seq("MATERIALIZED") 1456 kind = self._match_set(self.CREATABLES) and self._prev 1457 if not kind: 1458 return self._parse_as_command(start) 1459 1460 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1461 this = self._parse_user_defined_function(kind=kind.token_type) 1462 elif kind.token_type == TokenType.TABLE: 1463 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1464 elif kind.token_type == TokenType.COLUMN: 1465 this = self._parse_column() 1466 else: 1467 this = self._parse_id_var() 1468 1469 self._match(TokenType.IS) 1470 1471 return self.expression( 1472 exp.Comment, 1473 this=this, 1474 kind=kind.text, 1475 expression=self._parse_string(), 1476 exists=exists, 1477 materialized=materialized, 1478 ) 1479 1480 def _parse_to_table( 1481 self, 1482 ) -> exp.ToTableProperty: 1483 table = self._parse_table_parts(schema=True) 1484 return self.expression(exp.ToTableProperty, this=table) 1485 1486 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1487 def _parse_ttl(self) -> exp.Expression: 1488 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1489 this = self._parse_bitwise() 1490 1491 if self._match_text_seq("DELETE"): 1492 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1493 if self._match_text_seq("RECOMPRESS"): 1494 return self.expression( 1495 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1496 ) 1497 if self._match_text_seq("TO", "DISK"): 1498 return self.expression( 1499 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1500 ) 1501 if self._match_text_seq("TO", "VOLUME"): 1502 return self.expression( 1503 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1504 ) 1505 1506 return this 1507 1508 expressions = self._parse_csv(_parse_ttl_action) 1509 where = self._parse_where() 1510 group = self._parse_group() 1511 1512 aggregates = None 1513 if group and self._match(TokenType.SET): 1514 aggregates = self._parse_csv(self._parse_set_item) 1515 1516 return self.expression( 1517 exp.MergeTreeTTL, 1518 expressions=expressions, 1519 where=where, 1520 group=group, 1521 aggregates=aggregates, 1522 ) 1523 1524 def _parse_statement(self) -> t.Optional[exp.Expression]: 1525 if self._curr is None: 1526 return None 1527 1528 if self._match_set(self.STATEMENT_PARSERS): 1529 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1530 1531 if self._match_set(self.dialect.tokenizer.COMMANDS): 1532 return self._parse_command() 1533 1534 expression = self._parse_expression() 1535 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1536 return self._parse_query_modifiers(expression) 1537 1538 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1539 start = self._prev 1540 temporary = self._match(TokenType.TEMPORARY) 1541 materialized = self._match_text_seq("MATERIALIZED") 1542 1543 kind = self._match_set(self.CREATABLES) and self._prev.text 1544 if not kind: 1545 return self._parse_as_command(start) 1546 1547 if_exists = exists or self._parse_exists() 1548 table = self._parse_table_parts( 1549 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1550 ) 1551 1552 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1553 1554 if self._match(TokenType.L_PAREN, advance=False): 1555 expressions = self._parse_wrapped_csv(self._parse_types) 1556 else: 1557 expressions = None 1558 1559 return self.expression( 1560 exp.Drop, 1561 comments=start.comments, 1562 exists=if_exists, 1563 this=table, 1564 expressions=expressions, 1565 kind=kind.upper(), 1566 temporary=temporary, 1567 materialized=materialized, 1568 cascade=self._match_text_seq("CASCADE"), 1569 constraints=self._match_text_seq("CONSTRAINTS"), 1570 purge=self._match_text_seq("PURGE"), 1571 cluster=cluster, 1572 ) 1573 1574 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1575 return ( 1576 self._match_text_seq("IF") 1577 and (not not_ or self._match(TokenType.NOT)) 1578 and self._match(TokenType.EXISTS) 1579 ) 1580 1581 def _parse_create(self) -> exp.Create | exp.Command: 1582 # Note: this can't be None because we've matched a statement parser 1583 start = self._prev 1584 comments = self._prev_comments 1585 1586 replace = ( 1587 start.token_type == TokenType.REPLACE 1588 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1589 or self._match_pair(TokenType.OR, TokenType.ALTER) 1590 ) 1591 1592 unique = self._match(TokenType.UNIQUE) 1593 1594 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1595 self._advance() 1596 1597 properties = None 1598 create_token = self._match_set(self.CREATABLES) and self._prev 1599 1600 if not create_token: 1601 # exp.Properties.Location.POST_CREATE 1602 properties = self._parse_properties() 1603 create_token = self._match_set(self.CREATABLES) and self._prev 1604 1605 if not properties or not create_token: 1606 return self._parse_as_command(start) 1607 1608 exists = self._parse_exists(not_=True) 1609 this = None 1610 expression: t.Optional[exp.Expression] = None 1611 indexes = None 1612 no_schema_binding = None 1613 begin = None 1614 end = None 1615 clone = None 1616 1617 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1618 nonlocal properties 1619 if properties and temp_props: 1620 properties.expressions.extend(temp_props.expressions) 1621 elif temp_props: 1622 properties = temp_props 1623 1624 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1625 this = self._parse_user_defined_function(kind=create_token.token_type) 1626 1627 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1628 extend_props(self._parse_properties()) 1629 1630 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1631 1632 if not expression: 1633 if self._match(TokenType.COMMAND): 1634 expression = self._parse_as_command(self._prev) 1635 else: 1636 begin = self._match(TokenType.BEGIN) 1637 return_ = self._match_text_seq("RETURN") 1638 1639 if self._match(TokenType.STRING, advance=False): 1640 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1641 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1642 expression = self._parse_string() 1643 extend_props(self._parse_properties()) 1644 else: 1645 expression = self._parse_statement() 1646 1647 end = self._match_text_seq("END") 1648 1649 if return_: 1650 expression = self.expression(exp.Return, this=expression) 1651 elif create_token.token_type == TokenType.INDEX: 1652 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1653 if not self._match(TokenType.ON): 1654 index = self._parse_id_var() 1655 anonymous = False 1656 else: 1657 index = None 1658 anonymous = True 1659 1660 this = self._parse_index(index=index, anonymous=anonymous) 1661 elif create_token.token_type in self.DB_CREATABLES: 1662 table_parts = self._parse_table_parts( 1663 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1664 ) 1665 1666 # exp.Properties.Location.POST_NAME 1667 self._match(TokenType.COMMA) 1668 extend_props(self._parse_properties(before=True)) 1669 1670 this = self._parse_schema(this=table_parts) 1671 1672 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1673 extend_props(self._parse_properties()) 1674 1675 self._match(TokenType.ALIAS) 1676 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1677 # exp.Properties.Location.POST_ALIAS 1678 extend_props(self._parse_properties()) 1679 1680 if create_token.token_type == TokenType.SEQUENCE: 1681 expression = self._parse_types() 1682 extend_props(self._parse_properties()) 1683 else: 1684 expression = self._parse_ddl_select() 1685 1686 if create_token.token_type == TokenType.TABLE: 1687 # exp.Properties.Location.POST_EXPRESSION 1688 extend_props(self._parse_properties()) 1689 1690 indexes = [] 1691 while True: 1692 index = self._parse_index() 1693 1694 # exp.Properties.Location.POST_INDEX 1695 extend_props(self._parse_properties()) 1696 1697 if not index: 1698 break 1699 else: 1700 self._match(TokenType.COMMA) 1701 indexes.append(index) 1702 elif create_token.token_type == TokenType.VIEW: 1703 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1704 no_schema_binding = True 1705 1706 shallow = self._match_text_seq("SHALLOW") 1707 1708 if self._match_texts(self.CLONE_KEYWORDS): 1709 copy = self._prev.text.lower() == "copy" 1710 clone = self.expression( 1711 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1712 ) 1713 1714 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1715 return self._parse_as_command(start) 1716 1717 return self.expression( 1718 exp.Create, 1719 comments=comments, 1720 this=this, 1721 kind=create_token.text.upper(), 1722 replace=replace, 1723 unique=unique, 1724 expression=expression, 1725 exists=exists, 1726 properties=properties, 1727 indexes=indexes, 1728 no_schema_binding=no_schema_binding, 1729 begin=begin, 1730 end=end, 1731 clone=clone, 1732 ) 1733 1734 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1735 seq = exp.SequenceProperties() 1736 1737 options = [] 1738 index = self._index 1739 1740 while self._curr: 1741 self._match(TokenType.COMMA) 1742 if self._match_text_seq("INCREMENT"): 1743 self._match_text_seq("BY") 1744 self._match_text_seq("=") 1745 seq.set("increment", self._parse_term()) 1746 elif self._match_text_seq("MINVALUE"): 1747 seq.set("minvalue", self._parse_term()) 1748 elif self._match_text_seq("MAXVALUE"): 1749 seq.set("maxvalue", self._parse_term()) 1750 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1751 self._match_text_seq("=") 1752 seq.set("start", self._parse_term()) 1753 elif self._match_text_seq("CACHE"): 1754 # T-SQL allows empty CACHE which is initialized dynamically 1755 seq.set("cache", self._parse_number() or True) 1756 elif self._match_text_seq("OWNED", "BY"): 1757 # "OWNED BY NONE" is the default 1758 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1759 else: 1760 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1761 if opt: 1762 options.append(opt) 1763 else: 1764 break 1765 1766 seq.set("options", options if options else None) 1767 return None if self._index == index else seq 1768 1769 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1770 # only used for teradata currently 1771 self._match(TokenType.COMMA) 1772 1773 kwargs = { 1774 "no": self._match_text_seq("NO"), 1775 "dual": self._match_text_seq("DUAL"), 1776 "before": self._match_text_seq("BEFORE"), 1777 "default": self._match_text_seq("DEFAULT"), 1778 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1779 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1780 "after": self._match_text_seq("AFTER"), 1781 "minimum": self._match_texts(("MIN", "MINIMUM")), 1782 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1783 } 1784 1785 if self._match_texts(self.PROPERTY_PARSERS): 1786 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1787 try: 1788 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1789 except TypeError: 1790 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1791 1792 return None 1793 1794 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1795 return self._parse_wrapped_csv(self._parse_property) 1796 1797 def _parse_property(self) -> t.Optional[exp.Expression]: 1798 if self._match_texts(self.PROPERTY_PARSERS): 1799 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1800 1801 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1802 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1803 1804 if self._match_text_seq("COMPOUND", "SORTKEY"): 1805 return self._parse_sortkey(compound=True) 1806 1807 if self._match_text_seq("SQL", "SECURITY"): 1808 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1809 1810 index = self._index 1811 key = self._parse_column() 1812 1813 if not self._match(TokenType.EQ): 1814 self._retreat(index) 1815 return self._parse_sequence_properties() 1816 1817 return self.expression( 1818 exp.Property, 1819 this=key.to_dot() if isinstance(key, exp.Column) else key, 1820 value=self._parse_bitwise() or self._parse_var(any_token=True), 1821 ) 1822 1823 def _parse_stored(self) -> exp.FileFormatProperty: 1824 self._match(TokenType.ALIAS) 1825 1826 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1827 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1828 1829 return self.expression( 1830 exp.FileFormatProperty, 1831 this=( 1832 self.expression( 1833 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1834 ) 1835 if input_format or output_format 1836 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1837 ), 1838 ) 1839 1840 def _parse_unquoted_field(self): 1841 field = self._parse_field() 1842 if isinstance(field, exp.Identifier) and not field.quoted: 1843 field = exp.var(field) 1844 1845 return field 1846 1847 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1848 self._match(TokenType.EQ) 1849 self._match(TokenType.ALIAS) 1850 1851 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1852 1853 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1854 properties = [] 1855 while True: 1856 if before: 1857 prop = self._parse_property_before() 1858 else: 1859 prop = self._parse_property() 1860 if not prop: 1861 break 1862 for p in ensure_list(prop): 1863 properties.append(p) 1864 1865 if properties: 1866 return self.expression(exp.Properties, expressions=properties) 1867 1868 return None 1869 1870 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1871 return self.expression( 1872 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1873 ) 1874 1875 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1876 if self._index >= 2: 1877 pre_volatile_token = self._tokens[self._index - 2] 1878 else: 1879 pre_volatile_token = None 1880 1881 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1882 return exp.VolatileProperty() 1883 1884 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1885 1886 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1887 self._match_pair(TokenType.EQ, TokenType.ON) 1888 1889 prop = self.expression(exp.WithSystemVersioningProperty) 1890 if self._match(TokenType.L_PAREN): 1891 self._match_text_seq("HISTORY_TABLE", "=") 1892 prop.set("this", self._parse_table_parts()) 1893 1894 if self._match(TokenType.COMMA): 1895 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1896 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1897 1898 self._match_r_paren() 1899 1900 return prop 1901 1902 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1903 if self._match(TokenType.L_PAREN, advance=False): 1904 return self._parse_wrapped_properties() 1905 1906 if self._match_text_seq("JOURNAL"): 1907 return self._parse_withjournaltable() 1908 1909 if self._match_texts(self.VIEW_ATTRIBUTES): 1910 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1911 1912 if self._match_text_seq("DATA"): 1913 return self._parse_withdata(no=False) 1914 elif self._match_text_seq("NO", "DATA"): 1915 return self._parse_withdata(no=True) 1916 1917 if not self._next: 1918 return None 1919 1920 return self._parse_withisolatedloading() 1921 1922 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1923 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1924 self._match(TokenType.EQ) 1925 1926 user = self._parse_id_var() 1927 self._match(TokenType.PARAMETER) 1928 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1929 1930 if not user or not host: 1931 return None 1932 1933 return exp.DefinerProperty(this=f"{user}@{host}") 1934 1935 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1936 self._match(TokenType.TABLE) 1937 self._match(TokenType.EQ) 1938 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1939 1940 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1941 return self.expression(exp.LogProperty, no=no) 1942 1943 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1944 return self.expression(exp.JournalProperty, **kwargs) 1945 1946 def _parse_checksum(self) -> exp.ChecksumProperty: 1947 self._match(TokenType.EQ) 1948 1949 on = None 1950 if self._match(TokenType.ON): 1951 on = True 1952 elif self._match_text_seq("OFF"): 1953 on = False 1954 1955 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1956 1957 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1958 return self.expression( 1959 exp.Cluster, 1960 expressions=( 1961 self._parse_wrapped_csv(self._parse_ordered) 1962 if wrapped 1963 else self._parse_csv(self._parse_ordered) 1964 ), 1965 ) 1966 1967 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1968 self._match_text_seq("BY") 1969 1970 self._match_l_paren() 1971 expressions = self._parse_csv(self._parse_column) 1972 self._match_r_paren() 1973 1974 if self._match_text_seq("SORTED", "BY"): 1975 self._match_l_paren() 1976 sorted_by = self._parse_csv(self._parse_ordered) 1977 self._match_r_paren() 1978 else: 1979 sorted_by = None 1980 1981 self._match(TokenType.INTO) 1982 buckets = self._parse_number() 1983 self._match_text_seq("BUCKETS") 1984 1985 return self.expression( 1986 exp.ClusteredByProperty, 1987 expressions=expressions, 1988 sorted_by=sorted_by, 1989 buckets=buckets, 1990 ) 1991 1992 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1993 if not self._match_text_seq("GRANTS"): 1994 self._retreat(self._index - 1) 1995 return None 1996 1997 return self.expression(exp.CopyGrantsProperty) 1998 1999 def _parse_freespace(self) -> exp.FreespaceProperty: 2000 self._match(TokenType.EQ) 2001 return self.expression( 2002 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2003 ) 2004 2005 def _parse_mergeblockratio( 2006 self, no: bool = False, default: bool = False 2007 ) -> exp.MergeBlockRatioProperty: 2008 if self._match(TokenType.EQ): 2009 return self.expression( 2010 exp.MergeBlockRatioProperty, 2011 this=self._parse_number(), 2012 percent=self._match(TokenType.PERCENT), 2013 ) 2014 2015 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2016 2017 def _parse_datablocksize( 2018 self, 2019 default: t.Optional[bool] = None, 2020 minimum: t.Optional[bool] = None, 2021 maximum: t.Optional[bool] = None, 2022 ) -> exp.DataBlocksizeProperty: 2023 self._match(TokenType.EQ) 2024 size = self._parse_number() 2025 2026 units = None 2027 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2028 units = self._prev.text 2029 2030 return self.expression( 2031 exp.DataBlocksizeProperty, 2032 size=size, 2033 units=units, 2034 default=default, 2035 minimum=minimum, 2036 maximum=maximum, 2037 ) 2038 2039 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2040 self._match(TokenType.EQ) 2041 always = self._match_text_seq("ALWAYS") 2042 manual = self._match_text_seq("MANUAL") 2043 never = self._match_text_seq("NEVER") 2044 default = self._match_text_seq("DEFAULT") 2045 2046 autotemp = None 2047 if self._match_text_seq("AUTOTEMP"): 2048 autotemp = self._parse_schema() 2049 2050 return self.expression( 2051 exp.BlockCompressionProperty, 2052 always=always, 2053 manual=manual, 2054 never=never, 2055 default=default, 2056 autotemp=autotemp, 2057 ) 2058 2059 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2060 index = self._index 2061 no = self._match_text_seq("NO") 2062 concurrent = self._match_text_seq("CONCURRENT") 2063 2064 if not self._match_text_seq("ISOLATED", "LOADING"): 2065 self._retreat(index) 2066 return None 2067 2068 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2069 return self.expression( 2070 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2071 ) 2072 2073 def _parse_locking(self) -> exp.LockingProperty: 2074 if self._match(TokenType.TABLE): 2075 kind = "TABLE" 2076 elif self._match(TokenType.VIEW): 2077 kind = "VIEW" 2078 elif self._match(TokenType.ROW): 2079 kind = "ROW" 2080 elif self._match_text_seq("DATABASE"): 2081 kind = "DATABASE" 2082 else: 2083 kind = None 2084 2085 if kind in ("DATABASE", "TABLE", "VIEW"): 2086 this = self._parse_table_parts() 2087 else: 2088 this = None 2089 2090 if self._match(TokenType.FOR): 2091 for_or_in = "FOR" 2092 elif self._match(TokenType.IN): 2093 for_or_in = "IN" 2094 else: 2095 for_or_in = None 2096 2097 if self._match_text_seq("ACCESS"): 2098 lock_type = "ACCESS" 2099 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2100 lock_type = "EXCLUSIVE" 2101 elif self._match_text_seq("SHARE"): 2102 lock_type = "SHARE" 2103 elif self._match_text_seq("READ"): 2104 lock_type = "READ" 2105 elif self._match_text_seq("WRITE"): 2106 lock_type = "WRITE" 2107 elif self._match_text_seq("CHECKSUM"): 2108 lock_type = "CHECKSUM" 2109 else: 2110 lock_type = None 2111 2112 override = self._match_text_seq("OVERRIDE") 2113 2114 return self.expression( 2115 exp.LockingProperty, 2116 this=this, 2117 kind=kind, 2118 for_or_in=for_or_in, 2119 lock_type=lock_type, 2120 override=override, 2121 ) 2122 2123 def _parse_partition_by(self) -> t.List[exp.Expression]: 2124 if self._match(TokenType.PARTITION_BY): 2125 return self._parse_csv(self._parse_conjunction) 2126 return [] 2127 2128 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2129 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2130 if self._match_text_seq("MINVALUE"): 2131 return exp.var("MINVALUE") 2132 if self._match_text_seq("MAXVALUE"): 2133 return exp.var("MAXVALUE") 2134 return self._parse_bitwise() 2135 2136 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2137 expression = None 2138 from_expressions = None 2139 to_expressions = None 2140 2141 if self._match(TokenType.IN): 2142 this = self._parse_wrapped_csv(self._parse_bitwise) 2143 elif self._match(TokenType.FROM): 2144 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2145 self._match_text_seq("TO") 2146 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2147 elif self._match_text_seq("WITH", "(", "MODULUS"): 2148 this = self._parse_number() 2149 self._match_text_seq(",", "REMAINDER") 2150 expression = self._parse_number() 2151 self._match_r_paren() 2152 else: 2153 self.raise_error("Failed to parse partition bound spec.") 2154 2155 return self.expression( 2156 exp.PartitionBoundSpec, 2157 this=this, 2158 expression=expression, 2159 from_expressions=from_expressions, 2160 to_expressions=to_expressions, 2161 ) 2162 2163 # https://www.postgresql.org/docs/current/sql-createtable.html 2164 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2165 if not self._match_text_seq("OF"): 2166 self._retreat(self._index - 1) 2167 return None 2168 2169 this = self._parse_table(schema=True) 2170 2171 if self._match(TokenType.DEFAULT): 2172 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2173 elif self._match_text_seq("FOR", "VALUES"): 2174 expression = self._parse_partition_bound_spec() 2175 else: 2176 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2177 2178 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2179 2180 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2181 self._match(TokenType.EQ) 2182 return self.expression( 2183 exp.PartitionedByProperty, 2184 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2185 ) 2186 2187 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2188 if self._match_text_seq("AND", "STATISTICS"): 2189 statistics = True 2190 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2191 statistics = False 2192 else: 2193 statistics = None 2194 2195 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2196 2197 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2198 if self._match_text_seq("SQL"): 2199 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2200 return None 2201 2202 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2203 if self._match_text_seq("SQL", "DATA"): 2204 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2205 return None 2206 2207 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2208 if self._match_text_seq("PRIMARY", "INDEX"): 2209 return exp.NoPrimaryIndexProperty() 2210 if self._match_text_seq("SQL"): 2211 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2212 return None 2213 2214 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2215 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2216 return exp.OnCommitProperty() 2217 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2218 return exp.OnCommitProperty(delete=True) 2219 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2220 2221 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2222 if self._match_text_seq("SQL", "DATA"): 2223 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2224 return None 2225 2226 def _parse_distkey(self) -> exp.DistKeyProperty: 2227 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2228 2229 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2230 table = self._parse_table(schema=True) 2231 2232 options = [] 2233 while self._match_texts(("INCLUDING", "EXCLUDING")): 2234 this = self._prev.text.upper() 2235 2236 id_var = self._parse_id_var() 2237 if not id_var: 2238 return None 2239 2240 options.append( 2241 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2242 ) 2243 2244 return self.expression(exp.LikeProperty, this=table, expressions=options) 2245 2246 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2247 return self.expression( 2248 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2249 ) 2250 2251 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2252 self._match(TokenType.EQ) 2253 return self.expression( 2254 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2255 ) 2256 2257 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2258 self._match_text_seq("WITH", "CONNECTION") 2259 return self.expression( 2260 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2261 ) 2262 2263 def _parse_returns(self) -> exp.ReturnsProperty: 2264 value: t.Optional[exp.Expression] 2265 is_table = self._match(TokenType.TABLE) 2266 2267 if is_table: 2268 if self._match(TokenType.LT): 2269 value = self.expression( 2270 exp.Schema, 2271 this="TABLE", 2272 expressions=self._parse_csv(self._parse_struct_types), 2273 ) 2274 if not self._match(TokenType.GT): 2275 self.raise_error("Expecting >") 2276 else: 2277 value = self._parse_schema(exp.var("TABLE")) 2278 else: 2279 value = self._parse_types() 2280 2281 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2282 2283 def _parse_describe(self) -> exp.Describe: 2284 kind = self._match_set(self.CREATABLES) and self._prev.text 2285 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2286 if self._match(TokenType.DOT): 2287 style = None 2288 self._retreat(self._index - 2) 2289 this = self._parse_table(schema=True) 2290 properties = self._parse_properties() 2291 expressions = properties.expressions if properties else None 2292 return self.expression( 2293 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2294 ) 2295 2296 def _parse_insert(self) -> exp.Insert: 2297 comments = ensure_list(self._prev_comments) 2298 hint = self._parse_hint() 2299 overwrite = self._match(TokenType.OVERWRITE) 2300 ignore = self._match(TokenType.IGNORE) 2301 local = self._match_text_seq("LOCAL") 2302 alternative = None 2303 is_function = None 2304 2305 if self._match_text_seq("DIRECTORY"): 2306 this: t.Optional[exp.Expression] = self.expression( 2307 exp.Directory, 2308 this=self._parse_var_or_string(), 2309 local=local, 2310 row_format=self._parse_row_format(match_row=True), 2311 ) 2312 else: 2313 if self._match(TokenType.OR): 2314 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2315 2316 self._match(TokenType.INTO) 2317 comments += ensure_list(self._prev_comments) 2318 self._match(TokenType.TABLE) 2319 is_function = self._match(TokenType.FUNCTION) 2320 2321 this = ( 2322 self._parse_table(schema=True, parse_partition=True) 2323 if not is_function 2324 else self._parse_function() 2325 ) 2326 2327 returning = self._parse_returning() 2328 2329 return self.expression( 2330 exp.Insert, 2331 comments=comments, 2332 hint=hint, 2333 is_function=is_function, 2334 this=this, 2335 stored=self._match_text_seq("STORED") and self._parse_stored(), 2336 by_name=self._match_text_seq("BY", "NAME"), 2337 exists=self._parse_exists(), 2338 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2339 and self._parse_conjunction(), 2340 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2341 conflict=self._parse_on_conflict(), 2342 returning=returning or self._parse_returning(), 2343 overwrite=overwrite, 2344 alternative=alternative, 2345 ignore=ignore, 2346 ) 2347 2348 def _parse_kill(self) -> exp.Kill: 2349 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2350 2351 return self.expression( 2352 exp.Kill, 2353 this=self._parse_primary(), 2354 kind=kind, 2355 ) 2356 2357 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2358 conflict = self._match_text_seq("ON", "CONFLICT") 2359 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2360 2361 if not conflict and not duplicate: 2362 return None 2363 2364 conflict_keys = None 2365 constraint = None 2366 2367 if conflict: 2368 if self._match_text_seq("ON", "CONSTRAINT"): 2369 constraint = self._parse_id_var() 2370 elif self._match(TokenType.L_PAREN): 2371 conflict_keys = self._parse_csv(self._parse_id_var) 2372 self._match_r_paren() 2373 2374 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2375 if self._prev.token_type == TokenType.UPDATE: 2376 self._match(TokenType.SET) 2377 expressions = self._parse_csv(self._parse_equality) 2378 else: 2379 expressions = None 2380 2381 return self.expression( 2382 exp.OnConflict, 2383 duplicate=duplicate, 2384 expressions=expressions, 2385 action=action, 2386 conflict_keys=conflict_keys, 2387 constraint=constraint, 2388 ) 2389 2390 def _parse_returning(self) -> t.Optional[exp.Returning]: 2391 if not self._match(TokenType.RETURNING): 2392 return None 2393 return self.expression( 2394 exp.Returning, 2395 expressions=self._parse_csv(self._parse_expression), 2396 into=self._match(TokenType.INTO) and self._parse_table_part(), 2397 ) 2398 2399 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2400 if not self._match(TokenType.FORMAT): 2401 return None 2402 return self._parse_row_format() 2403 2404 def _parse_row_format( 2405 self, match_row: bool = False 2406 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2407 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2408 return None 2409 2410 if self._match_text_seq("SERDE"): 2411 this = self._parse_string() 2412 2413 serde_properties = None 2414 if self._match(TokenType.SERDE_PROPERTIES): 2415 serde_properties = self.expression( 2416 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2417 ) 2418 2419 return self.expression( 2420 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2421 ) 2422 2423 self._match_text_seq("DELIMITED") 2424 2425 kwargs = {} 2426 2427 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2428 kwargs["fields"] = self._parse_string() 2429 if self._match_text_seq("ESCAPED", "BY"): 2430 kwargs["escaped"] = self._parse_string() 2431 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2432 kwargs["collection_items"] = self._parse_string() 2433 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2434 kwargs["map_keys"] = self._parse_string() 2435 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2436 kwargs["lines"] = self._parse_string() 2437 if self._match_text_seq("NULL", "DEFINED", "AS"): 2438 kwargs["null"] = self._parse_string() 2439 2440 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2441 2442 def _parse_load(self) -> exp.LoadData | exp.Command: 2443 if self._match_text_seq("DATA"): 2444 local = self._match_text_seq("LOCAL") 2445 self._match_text_seq("INPATH") 2446 inpath = self._parse_string() 2447 overwrite = self._match(TokenType.OVERWRITE) 2448 self._match_pair(TokenType.INTO, TokenType.TABLE) 2449 2450 return self.expression( 2451 exp.LoadData, 2452 this=self._parse_table(schema=True), 2453 local=local, 2454 overwrite=overwrite, 2455 inpath=inpath, 2456 partition=self._parse_partition(), 2457 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2458 serde=self._match_text_seq("SERDE") and self._parse_string(), 2459 ) 2460 return self._parse_as_command(self._prev) 2461 2462 def _parse_delete(self) -> exp.Delete: 2463 # This handles MySQL's "Multiple-Table Syntax" 2464 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2465 tables = None 2466 comments = self._prev_comments 2467 if not self._match(TokenType.FROM, advance=False): 2468 tables = self._parse_csv(self._parse_table) or None 2469 2470 returning = self._parse_returning() 2471 2472 return self.expression( 2473 exp.Delete, 2474 comments=comments, 2475 tables=tables, 2476 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2477 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2478 where=self._parse_where(), 2479 returning=returning or self._parse_returning(), 2480 limit=self._parse_limit(), 2481 ) 2482 2483 def _parse_update(self) -> exp.Update: 2484 comments = self._prev_comments 2485 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2486 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2487 returning = self._parse_returning() 2488 return self.expression( 2489 exp.Update, 2490 comments=comments, 2491 **{ # type: ignore 2492 "this": this, 2493 "expressions": expressions, 2494 "from": self._parse_from(joins=True), 2495 "where": self._parse_where(), 2496 "returning": returning or self._parse_returning(), 2497 "order": self._parse_order(), 2498 "limit": self._parse_limit(), 2499 }, 2500 ) 2501 2502 def _parse_uncache(self) -> exp.Uncache: 2503 if not self._match(TokenType.TABLE): 2504 self.raise_error("Expecting TABLE after UNCACHE") 2505 2506 return self.expression( 2507 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2508 ) 2509 2510 def _parse_cache(self) -> exp.Cache: 2511 lazy = self._match_text_seq("LAZY") 2512 self._match(TokenType.TABLE) 2513 table = self._parse_table(schema=True) 2514 2515 options = [] 2516 if self._match_text_seq("OPTIONS"): 2517 self._match_l_paren() 2518 k = self._parse_string() 2519 self._match(TokenType.EQ) 2520 v = self._parse_string() 2521 options = [k, v] 2522 self._match_r_paren() 2523 2524 self._match(TokenType.ALIAS) 2525 return self.expression( 2526 exp.Cache, 2527 this=table, 2528 lazy=lazy, 2529 options=options, 2530 expression=self._parse_select(nested=True), 2531 ) 2532 2533 def _parse_partition(self) -> t.Optional[exp.Partition]: 2534 if not self._match(TokenType.PARTITION): 2535 return None 2536 2537 return self.expression( 2538 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2539 ) 2540 2541 def _parse_value(self) -> t.Optional[exp.Tuple]: 2542 if self._match(TokenType.L_PAREN): 2543 expressions = self._parse_csv(self._parse_expression) 2544 self._match_r_paren() 2545 return self.expression(exp.Tuple, expressions=expressions) 2546 2547 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2548 expression = self._parse_expression() 2549 if expression: 2550 return self.expression(exp.Tuple, expressions=[expression]) 2551 return None 2552 2553 def _parse_projections(self) -> t.List[exp.Expression]: 2554 return self._parse_expressions() 2555 2556 def _parse_select( 2557 self, 2558 nested: bool = False, 2559 table: bool = False, 2560 parse_subquery_alias: bool = True, 2561 parse_set_operation: bool = True, 2562 ) -> t.Optional[exp.Expression]: 2563 cte = self._parse_with() 2564 2565 if cte: 2566 this = self._parse_statement() 2567 2568 if not this: 2569 self.raise_error("Failed to parse any statement following CTE") 2570 return cte 2571 2572 if "with" in this.arg_types: 2573 this.set("with", cte) 2574 else: 2575 self.raise_error(f"{this.key} does not support CTE") 2576 this = cte 2577 2578 return this 2579 2580 # duckdb supports leading with FROM x 2581 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2582 2583 if self._match(TokenType.SELECT): 2584 comments = self._prev_comments 2585 2586 hint = self._parse_hint() 2587 all_ = self._match(TokenType.ALL) 2588 distinct = self._match_set(self.DISTINCT_TOKENS) 2589 2590 kind = ( 2591 self._match(TokenType.ALIAS) 2592 and self._match_texts(("STRUCT", "VALUE")) 2593 and self._prev.text.upper() 2594 ) 2595 2596 if distinct: 2597 distinct = self.expression( 2598 exp.Distinct, 2599 on=self._parse_value() if self._match(TokenType.ON) else None, 2600 ) 2601 2602 if all_ and distinct: 2603 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2604 2605 limit = self._parse_limit(top=True) 2606 projections = self._parse_projections() 2607 2608 this = self.expression( 2609 exp.Select, 2610 kind=kind, 2611 hint=hint, 2612 distinct=distinct, 2613 expressions=projections, 2614 limit=limit, 2615 ) 2616 this.comments = comments 2617 2618 into = self._parse_into() 2619 if into: 2620 this.set("into", into) 2621 2622 if not from_: 2623 from_ = self._parse_from() 2624 2625 if from_: 2626 this.set("from", from_) 2627 2628 this = self._parse_query_modifiers(this) 2629 elif (table or nested) and self._match(TokenType.L_PAREN): 2630 if self._match(TokenType.PIVOT): 2631 this = self._parse_simplified_pivot() 2632 elif self._match(TokenType.FROM): 2633 this = exp.select("*").from_( 2634 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2635 ) 2636 else: 2637 this = ( 2638 self._parse_table() 2639 if table 2640 else self._parse_select(nested=True, parse_set_operation=False) 2641 ) 2642 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2643 2644 self._match_r_paren() 2645 2646 # We return early here so that the UNION isn't attached to the subquery by the 2647 # following call to _parse_set_operations, but instead becomes the parent node 2648 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2649 elif self._match(TokenType.VALUES, advance=False): 2650 this = self._parse_derived_table_values() 2651 elif from_: 2652 this = exp.select("*").from_(from_.this, copy=False) 2653 else: 2654 this = None 2655 2656 if parse_set_operation: 2657 return self._parse_set_operations(this) 2658 return this 2659 2660 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2661 if not skip_with_token and not self._match(TokenType.WITH): 2662 return None 2663 2664 comments = self._prev_comments 2665 recursive = self._match(TokenType.RECURSIVE) 2666 2667 expressions = [] 2668 while True: 2669 expressions.append(self._parse_cte()) 2670 2671 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2672 break 2673 else: 2674 self._match(TokenType.WITH) 2675 2676 return self.expression( 2677 exp.With, comments=comments, expressions=expressions, recursive=recursive 2678 ) 2679 2680 def _parse_cte(self) -> exp.CTE: 2681 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2682 if not alias or not alias.this: 2683 self.raise_error("Expected CTE to have alias") 2684 2685 self._match(TokenType.ALIAS) 2686 2687 if self._match_text_seq("NOT", "MATERIALIZED"): 2688 materialized = False 2689 elif self._match_text_seq("MATERIALIZED"): 2690 materialized = True 2691 else: 2692 materialized = None 2693 2694 return self.expression( 2695 exp.CTE, 2696 this=self._parse_wrapped(self._parse_statement), 2697 alias=alias, 2698 materialized=materialized, 2699 ) 2700 2701 def _parse_table_alias( 2702 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2703 ) -> t.Optional[exp.TableAlias]: 2704 any_token = self._match(TokenType.ALIAS) 2705 alias = ( 2706 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2707 or self._parse_string_as_identifier() 2708 ) 2709 2710 index = self._index 2711 if self._match(TokenType.L_PAREN): 2712 columns = self._parse_csv(self._parse_function_parameter) 2713 self._match_r_paren() if columns else self._retreat(index) 2714 else: 2715 columns = None 2716 2717 if not alias and not columns: 2718 return None 2719 2720 return self.expression(exp.TableAlias, this=alias, columns=columns) 2721 2722 def _parse_subquery( 2723 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2724 ) -> t.Optional[exp.Subquery]: 2725 if not this: 2726 return None 2727 2728 return self.expression( 2729 exp.Subquery, 2730 this=this, 2731 pivots=self._parse_pivots(), 2732 alias=self._parse_table_alias() if parse_alias else None, 2733 ) 2734 2735 def _implicit_unnests_to_explicit(self, this: E) -> E: 2736 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2737 2738 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2739 for i, join in enumerate(this.args.get("joins") or []): 2740 table = join.this 2741 normalized_table = table.copy() 2742 normalized_table.meta["maybe_column"] = True 2743 normalized_table = _norm(normalized_table, dialect=self.dialect) 2744 2745 if isinstance(table, exp.Table) and not join.args.get("on"): 2746 if normalized_table.parts[0].name in refs: 2747 table_as_column = table.to_column() 2748 unnest = exp.Unnest(expressions=[table_as_column]) 2749 2750 # Table.to_column creates a parent Alias node that we want to convert to 2751 # a TableAlias and attach to the Unnest, so it matches the parser's output 2752 if isinstance(table.args.get("alias"), exp.TableAlias): 2753 table_as_column.replace(table_as_column.this) 2754 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2755 2756 table.replace(unnest) 2757 2758 refs.add(normalized_table.alias_or_name) 2759 2760 return this 2761 2762 def _parse_query_modifiers( 2763 self, this: t.Optional[exp.Expression] 2764 ) -> t.Optional[exp.Expression]: 2765 if isinstance(this, (exp.Query, exp.Table)): 2766 for join in self._parse_joins(): 2767 this.append("joins", join) 2768 for lateral in iter(self._parse_lateral, None): 2769 this.append("laterals", lateral) 2770 2771 while True: 2772 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2773 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2774 key, expression = parser(self) 2775 2776 if expression: 2777 this.set(key, expression) 2778 if key == "limit": 2779 offset = expression.args.pop("offset", None) 2780 2781 if offset: 2782 offset = exp.Offset(expression=offset) 2783 this.set("offset", offset) 2784 2785 limit_by_expressions = expression.expressions 2786 expression.set("expressions", None) 2787 offset.set("expressions", limit_by_expressions) 2788 continue 2789 break 2790 2791 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2792 this = self._implicit_unnests_to_explicit(this) 2793 2794 return this 2795 2796 def _parse_hint(self) -> t.Optional[exp.Hint]: 2797 if self._match(TokenType.HINT): 2798 hints = [] 2799 for hint in iter( 2800 lambda: self._parse_csv( 2801 lambda: self._parse_function() or self._parse_var(upper=True) 2802 ), 2803 [], 2804 ): 2805 hints.extend(hint) 2806 2807 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2808 self.raise_error("Expected */ after HINT") 2809 2810 return self.expression(exp.Hint, expressions=hints) 2811 2812 return None 2813 2814 def _parse_into(self) -> t.Optional[exp.Into]: 2815 if not self._match(TokenType.INTO): 2816 return None 2817 2818 temp = self._match(TokenType.TEMPORARY) 2819 unlogged = self._match_text_seq("UNLOGGED") 2820 self._match(TokenType.TABLE) 2821 2822 return self.expression( 2823 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2824 ) 2825 2826 def _parse_from( 2827 self, joins: bool = False, skip_from_token: bool = False 2828 ) -> t.Optional[exp.From]: 2829 if not skip_from_token and not self._match(TokenType.FROM): 2830 return None 2831 2832 return self.expression( 2833 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2834 ) 2835 2836 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2837 return self.expression( 2838 exp.MatchRecognizeMeasure, 2839 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2840 this=self._parse_expression(), 2841 ) 2842 2843 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2844 if not self._match(TokenType.MATCH_RECOGNIZE): 2845 return None 2846 2847 self._match_l_paren() 2848 2849 partition = self._parse_partition_by() 2850 order = self._parse_order() 2851 2852 measures = ( 2853 self._parse_csv(self._parse_match_recognize_measure) 2854 if self._match_text_seq("MEASURES") 2855 else None 2856 ) 2857 2858 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2859 rows = exp.var("ONE ROW PER MATCH") 2860 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2861 text = "ALL ROWS PER MATCH" 2862 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2863 text += " SHOW EMPTY MATCHES" 2864 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2865 text += " OMIT EMPTY MATCHES" 2866 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2867 text += " WITH UNMATCHED ROWS" 2868 rows = exp.var(text) 2869 else: 2870 rows = None 2871 2872 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2873 text = "AFTER MATCH SKIP" 2874 if self._match_text_seq("PAST", "LAST", "ROW"): 2875 text += " PAST LAST ROW" 2876 elif self._match_text_seq("TO", "NEXT", "ROW"): 2877 text += " TO NEXT ROW" 2878 elif self._match_text_seq("TO", "FIRST"): 2879 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2880 elif self._match_text_seq("TO", "LAST"): 2881 text += f" TO LAST {self._advance_any().text}" # type: ignore 2882 after = exp.var(text) 2883 else: 2884 after = None 2885 2886 if self._match_text_seq("PATTERN"): 2887 self._match_l_paren() 2888 2889 if not self._curr: 2890 self.raise_error("Expecting )", self._curr) 2891 2892 paren = 1 2893 start = self._curr 2894 2895 while self._curr and paren > 0: 2896 if self._curr.token_type == TokenType.L_PAREN: 2897 paren += 1 2898 if self._curr.token_type == TokenType.R_PAREN: 2899 paren -= 1 2900 2901 end = self._prev 2902 self._advance() 2903 2904 if paren > 0: 2905 self.raise_error("Expecting )", self._curr) 2906 2907 pattern = exp.var(self._find_sql(start, end)) 2908 else: 2909 pattern = None 2910 2911 define = ( 2912 self._parse_csv(self._parse_name_as_expression) 2913 if self._match_text_seq("DEFINE") 2914 else None 2915 ) 2916 2917 self._match_r_paren() 2918 2919 return self.expression( 2920 exp.MatchRecognize, 2921 partition_by=partition, 2922 order=order, 2923 measures=measures, 2924 rows=rows, 2925 after=after, 2926 pattern=pattern, 2927 define=define, 2928 alias=self._parse_table_alias(), 2929 ) 2930 2931 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2932 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2933 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2934 cross_apply = False 2935 2936 if cross_apply is not None: 2937 this = self._parse_select(table=True) 2938 view = None 2939 outer = None 2940 elif self._match(TokenType.LATERAL): 2941 this = self._parse_select(table=True) 2942 view = self._match(TokenType.VIEW) 2943 outer = self._match(TokenType.OUTER) 2944 else: 2945 return None 2946 2947 if not this: 2948 this = ( 2949 self._parse_unnest() 2950 or self._parse_function() 2951 or self._parse_id_var(any_token=False) 2952 ) 2953 2954 while self._match(TokenType.DOT): 2955 this = exp.Dot( 2956 this=this, 2957 expression=self._parse_function() or self._parse_id_var(any_token=False), 2958 ) 2959 2960 if view: 2961 table = self._parse_id_var(any_token=False) 2962 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2963 table_alias: t.Optional[exp.TableAlias] = self.expression( 2964 exp.TableAlias, this=table, columns=columns 2965 ) 2966 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2967 # We move the alias from the lateral's child node to the lateral itself 2968 table_alias = this.args["alias"].pop() 2969 else: 2970 table_alias = self._parse_table_alias() 2971 2972 return self.expression( 2973 exp.Lateral, 2974 this=this, 2975 view=view, 2976 outer=outer, 2977 alias=table_alias, 2978 cross_apply=cross_apply, 2979 ) 2980 2981 def _parse_join_parts( 2982 self, 2983 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2984 return ( 2985 self._match_set(self.JOIN_METHODS) and self._prev, 2986 self._match_set(self.JOIN_SIDES) and self._prev, 2987 self._match_set(self.JOIN_KINDS) and self._prev, 2988 ) 2989 2990 def _parse_join( 2991 self, skip_join_token: bool = False, parse_bracket: bool = False 2992 ) -> t.Optional[exp.Join]: 2993 if self._match(TokenType.COMMA): 2994 return self.expression(exp.Join, this=self._parse_table()) 2995 2996 index = self._index 2997 method, side, kind = self._parse_join_parts() 2998 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2999 join = self._match(TokenType.JOIN) 3000 3001 if not skip_join_token and not join: 3002 self._retreat(index) 3003 kind = None 3004 method = None 3005 side = None 3006 3007 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3008 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3009 3010 if not skip_join_token and not join and not outer_apply and not cross_apply: 3011 return None 3012 3013 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3014 3015 if method: 3016 kwargs["method"] = method.text 3017 if side: 3018 kwargs["side"] = side.text 3019 if kind: 3020 kwargs["kind"] = kind.text 3021 if hint: 3022 kwargs["hint"] = hint 3023 3024 if self._match(TokenType.MATCH_CONDITION): 3025 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3026 3027 if self._match(TokenType.ON): 3028 kwargs["on"] = self._parse_conjunction() 3029 elif self._match(TokenType.USING): 3030 kwargs["using"] = self._parse_wrapped_id_vars() 3031 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3032 kind and kind.token_type == TokenType.CROSS 3033 ): 3034 index = self._index 3035 joins: t.Optional[list] = list(self._parse_joins()) 3036 3037 if joins and self._match(TokenType.ON): 3038 kwargs["on"] = self._parse_conjunction() 3039 elif joins and self._match(TokenType.USING): 3040 kwargs["using"] = self._parse_wrapped_id_vars() 3041 else: 3042 joins = None 3043 self._retreat(index) 3044 3045 kwargs["this"].set("joins", joins if joins else None) 3046 3047 comments = [c for token in (method, side, kind) if token for c in token.comments] 3048 return self.expression(exp.Join, comments=comments, **kwargs) 3049 3050 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3051 this = self._parse_conjunction() 3052 3053 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3054 return this 3055 3056 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3057 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3058 3059 return this 3060 3061 def _parse_index_params(self) -> exp.IndexParameters: 3062 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3063 3064 if self._match(TokenType.L_PAREN, advance=False): 3065 columns = self._parse_wrapped_csv(self._parse_with_operator) 3066 else: 3067 columns = None 3068 3069 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3070 partition_by = self._parse_partition_by() 3071 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3072 tablespace = ( 3073 self._parse_var(any_token=True) 3074 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3075 else None 3076 ) 3077 where = self._parse_where() 3078 3079 return self.expression( 3080 exp.IndexParameters, 3081 using=using, 3082 columns=columns, 3083 include=include, 3084 partition_by=partition_by, 3085 where=where, 3086 with_storage=with_storage, 3087 tablespace=tablespace, 3088 ) 3089 3090 def _parse_index( 3091 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3092 ) -> t.Optional[exp.Index]: 3093 if index or anonymous: 3094 unique = None 3095 primary = None 3096 amp = None 3097 3098 self._match(TokenType.ON) 3099 self._match(TokenType.TABLE) # hive 3100 table = self._parse_table_parts(schema=True) 3101 else: 3102 unique = self._match(TokenType.UNIQUE) 3103 primary = self._match_text_seq("PRIMARY") 3104 amp = self._match_text_seq("AMP") 3105 3106 if not self._match(TokenType.INDEX): 3107 return None 3108 3109 index = self._parse_id_var() 3110 table = None 3111 3112 params = self._parse_index_params() 3113 3114 return self.expression( 3115 exp.Index, 3116 this=index, 3117 table=table, 3118 unique=unique, 3119 primary=primary, 3120 amp=amp, 3121 params=params, 3122 ) 3123 3124 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3125 hints: t.List[exp.Expression] = [] 3126 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3127 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3128 hints.append( 3129 self.expression( 3130 exp.WithTableHint, 3131 expressions=self._parse_csv( 3132 lambda: self._parse_function() or self._parse_var(any_token=True) 3133 ), 3134 ) 3135 ) 3136 self._match_r_paren() 3137 else: 3138 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3139 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3140 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3141 3142 self._match_texts(("INDEX", "KEY")) 3143 if self._match(TokenType.FOR): 3144 hint.set("target", self._advance_any() and self._prev.text.upper()) 3145 3146 hint.set("expressions", self._parse_wrapped_id_vars()) 3147 hints.append(hint) 3148 3149 return hints or None 3150 3151 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3152 return ( 3153 (not schema and self._parse_function(optional_parens=False)) 3154 or self._parse_id_var(any_token=False) 3155 or self._parse_string_as_identifier() 3156 or self._parse_placeholder() 3157 ) 3158 3159 def _parse_table_parts( 3160 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3161 ) -> exp.Table: 3162 catalog = None 3163 db = None 3164 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3165 3166 while self._match(TokenType.DOT): 3167 if catalog: 3168 # This allows nesting the table in arbitrarily many dot expressions if needed 3169 table = self.expression( 3170 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3171 ) 3172 else: 3173 catalog = db 3174 db = table 3175 # "" used for tsql FROM a..b case 3176 table = self._parse_table_part(schema=schema) or "" 3177 3178 if ( 3179 wildcard 3180 and self._is_connected() 3181 and (isinstance(table, exp.Identifier) or not table) 3182 and self._match(TokenType.STAR) 3183 ): 3184 if isinstance(table, exp.Identifier): 3185 table.args["this"] += "*" 3186 else: 3187 table = exp.Identifier(this="*") 3188 3189 # We bubble up comments from the Identifier to the Table 3190 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3191 3192 if is_db_reference: 3193 catalog = db 3194 db = table 3195 table = None 3196 3197 if not table and not is_db_reference: 3198 self.raise_error(f"Expected table name but got {self._curr}") 3199 if not db and is_db_reference: 3200 self.raise_error(f"Expected database name but got {self._curr}") 3201 3202 return self.expression( 3203 exp.Table, 3204 comments=comments, 3205 this=table, 3206 db=db, 3207 catalog=catalog, 3208 pivots=self._parse_pivots(), 3209 ) 3210 3211 def _parse_table( 3212 self, 3213 schema: bool = False, 3214 joins: bool = False, 3215 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3216 parse_bracket: bool = False, 3217 is_db_reference: bool = False, 3218 parse_partition: bool = False, 3219 ) -> t.Optional[exp.Expression]: 3220 lateral = self._parse_lateral() 3221 if lateral: 3222 return lateral 3223 3224 unnest = self._parse_unnest() 3225 if unnest: 3226 return unnest 3227 3228 values = self._parse_derived_table_values() 3229 if values: 3230 return values 3231 3232 subquery = self._parse_select(table=True) 3233 if subquery: 3234 if not subquery.args.get("pivots"): 3235 subquery.set("pivots", self._parse_pivots()) 3236 return subquery 3237 3238 bracket = parse_bracket and self._parse_bracket(None) 3239 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3240 3241 only = self._match(TokenType.ONLY) 3242 3243 this = t.cast( 3244 exp.Expression, 3245 bracket 3246 or self._parse_bracket( 3247 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3248 ), 3249 ) 3250 3251 if only: 3252 this.set("only", only) 3253 3254 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3255 self._match_text_seq("*") 3256 3257 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3258 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3259 this.set("partition", self._parse_partition()) 3260 3261 if schema: 3262 return self._parse_schema(this=this) 3263 3264 version = self._parse_version() 3265 3266 if version: 3267 this.set("version", version) 3268 3269 if self.dialect.ALIAS_POST_TABLESAMPLE: 3270 table_sample = self._parse_table_sample() 3271 3272 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3273 if alias: 3274 this.set("alias", alias) 3275 3276 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3277 return self.expression( 3278 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3279 ) 3280 3281 this.set("hints", self._parse_table_hints()) 3282 3283 if not this.args.get("pivots"): 3284 this.set("pivots", self._parse_pivots()) 3285 3286 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3287 table_sample = self._parse_table_sample() 3288 3289 if table_sample: 3290 table_sample.set("this", this) 3291 this = table_sample 3292 3293 if joins: 3294 for join in self._parse_joins(): 3295 this.append("joins", join) 3296 3297 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3298 this.set("ordinality", True) 3299 this.set("alias", self._parse_table_alias()) 3300 3301 return this 3302 3303 def _parse_version(self) -> t.Optional[exp.Version]: 3304 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3305 this = "TIMESTAMP" 3306 elif self._match(TokenType.VERSION_SNAPSHOT): 3307 this = "VERSION" 3308 else: 3309 return None 3310 3311 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3312 kind = self._prev.text.upper() 3313 start = self._parse_bitwise() 3314 self._match_texts(("TO", "AND")) 3315 end = self._parse_bitwise() 3316 expression: t.Optional[exp.Expression] = self.expression( 3317 exp.Tuple, expressions=[start, end] 3318 ) 3319 elif self._match_text_seq("CONTAINED", "IN"): 3320 kind = "CONTAINED IN" 3321 expression = self.expression( 3322 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3323 ) 3324 elif self._match(TokenType.ALL): 3325 kind = "ALL" 3326 expression = None 3327 else: 3328 self._match_text_seq("AS", "OF") 3329 kind = "AS OF" 3330 expression = self._parse_type() 3331 3332 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3333 3334 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3335 if not self._match(TokenType.UNNEST): 3336 return None 3337 3338 expressions = self._parse_wrapped_csv(self._parse_equality) 3339 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3340 3341 alias = self._parse_table_alias() if with_alias else None 3342 3343 if alias: 3344 if self.dialect.UNNEST_COLUMN_ONLY: 3345 if alias.args.get("columns"): 3346 self.raise_error("Unexpected extra column alias in unnest.") 3347 3348 alias.set("columns", [alias.this]) 3349 alias.set("this", None) 3350 3351 columns = alias.args.get("columns") or [] 3352 if offset and len(expressions) < len(columns): 3353 offset = columns.pop() 3354 3355 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3356 self._match(TokenType.ALIAS) 3357 offset = self._parse_id_var( 3358 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3359 ) or exp.to_identifier("offset") 3360 3361 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3362 3363 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3364 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3365 if not is_derived and not self._match_text_seq("VALUES"): 3366 return None 3367 3368 expressions = self._parse_csv(self._parse_value) 3369 alias = self._parse_table_alias() 3370 3371 if is_derived: 3372 self._match_r_paren() 3373 3374 return self.expression( 3375 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3376 ) 3377 3378 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3379 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3380 as_modifier and self._match_text_seq("USING", "SAMPLE") 3381 ): 3382 return None 3383 3384 bucket_numerator = None 3385 bucket_denominator = None 3386 bucket_field = None 3387 percent = None 3388 size = None 3389 seed = None 3390 3391 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3392 matched_l_paren = self._match(TokenType.L_PAREN) 3393 3394 if self.TABLESAMPLE_CSV: 3395 num = None 3396 expressions = self._parse_csv(self._parse_primary) 3397 else: 3398 expressions = None 3399 num = ( 3400 self._parse_factor() 3401 if self._match(TokenType.NUMBER, advance=False) 3402 else self._parse_primary() or self._parse_placeholder() 3403 ) 3404 3405 if self._match_text_seq("BUCKET"): 3406 bucket_numerator = self._parse_number() 3407 self._match_text_seq("OUT", "OF") 3408 bucket_denominator = bucket_denominator = self._parse_number() 3409 self._match(TokenType.ON) 3410 bucket_field = self._parse_field() 3411 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3412 percent = num 3413 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3414 size = num 3415 else: 3416 percent = num 3417 3418 if matched_l_paren: 3419 self._match_r_paren() 3420 3421 if self._match(TokenType.L_PAREN): 3422 method = self._parse_var(upper=True) 3423 seed = self._match(TokenType.COMMA) and self._parse_number() 3424 self._match_r_paren() 3425 elif self._match_texts(("SEED", "REPEATABLE")): 3426 seed = self._parse_wrapped(self._parse_number) 3427 3428 if not method and self.DEFAULT_SAMPLING_METHOD: 3429 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3430 3431 return self.expression( 3432 exp.TableSample, 3433 expressions=expressions, 3434 method=method, 3435 bucket_numerator=bucket_numerator, 3436 bucket_denominator=bucket_denominator, 3437 bucket_field=bucket_field, 3438 percent=percent, 3439 size=size, 3440 seed=seed, 3441 ) 3442 3443 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3444 return list(iter(self._parse_pivot, None)) or None 3445 3446 def _parse_joins(self) -> t.Iterator[exp.Join]: 3447 return iter(self._parse_join, None) 3448 3449 # https://duckdb.org/docs/sql/statements/pivot 3450 def _parse_simplified_pivot(self) -> exp.Pivot: 3451 def _parse_on() -> t.Optional[exp.Expression]: 3452 this = self._parse_bitwise() 3453 return self._parse_in(this) if self._match(TokenType.IN) else this 3454 3455 this = self._parse_table() 3456 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3457 using = self._match(TokenType.USING) and self._parse_csv( 3458 lambda: self._parse_alias(self._parse_function()) 3459 ) 3460 group = self._parse_group() 3461 return self.expression( 3462 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3463 ) 3464 3465 def _parse_pivot_in(self) -> exp.In: 3466 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3467 this = self._parse_conjunction() 3468 3469 self._match(TokenType.ALIAS) 3470 alias = self._parse_field() 3471 if alias: 3472 return self.expression(exp.PivotAlias, this=this, alias=alias) 3473 3474 return this 3475 3476 value = self._parse_column() 3477 3478 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3479 self.raise_error("Expecting IN (") 3480 3481 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3482 3483 self._match_r_paren() 3484 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3485 3486 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3487 index = self._index 3488 include_nulls = None 3489 3490 if self._match(TokenType.PIVOT): 3491 unpivot = False 3492 elif self._match(TokenType.UNPIVOT): 3493 unpivot = True 3494 3495 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3496 if self._match_text_seq("INCLUDE", "NULLS"): 3497 include_nulls = True 3498 elif self._match_text_seq("EXCLUDE", "NULLS"): 3499 include_nulls = False 3500 else: 3501 return None 3502 3503 expressions = [] 3504 3505 if not self._match(TokenType.L_PAREN): 3506 self._retreat(index) 3507 return None 3508 3509 if unpivot: 3510 expressions = self._parse_csv(self._parse_column) 3511 else: 3512 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3513 3514 if not expressions: 3515 self.raise_error("Failed to parse PIVOT's aggregation list") 3516 3517 if not self._match(TokenType.FOR): 3518 self.raise_error("Expecting FOR") 3519 3520 field = self._parse_pivot_in() 3521 3522 self._match_r_paren() 3523 3524 pivot = self.expression( 3525 exp.Pivot, 3526 expressions=expressions, 3527 field=field, 3528 unpivot=unpivot, 3529 include_nulls=include_nulls, 3530 ) 3531 3532 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3533 pivot.set("alias", self._parse_table_alias()) 3534 3535 if not unpivot: 3536 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3537 3538 columns: t.List[exp.Expression] = [] 3539 for fld in pivot.args["field"].expressions: 3540 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3541 for name in names: 3542 if self.PREFIXED_PIVOT_COLUMNS: 3543 name = f"{name}_{field_name}" if name else field_name 3544 else: 3545 name = f"{field_name}_{name}" if name else field_name 3546 3547 columns.append(exp.to_identifier(name)) 3548 3549 pivot.set("columns", columns) 3550 3551 return pivot 3552 3553 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3554 return [agg.alias for agg in aggregations] 3555 3556 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3557 if not skip_where_token and not self._match(TokenType.PREWHERE): 3558 return None 3559 3560 return self.expression( 3561 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3562 ) 3563 3564 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3565 if not skip_where_token and not self._match(TokenType.WHERE): 3566 return None 3567 3568 return self.expression( 3569 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3570 ) 3571 3572 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3573 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3574 return None 3575 3576 elements: t.Dict[str, t.Any] = defaultdict(list) 3577 3578 if self._match(TokenType.ALL): 3579 elements["all"] = True 3580 elif self._match(TokenType.DISTINCT): 3581 elements["all"] = False 3582 3583 while True: 3584 expressions = self._parse_csv( 3585 lambda: None 3586 if self._match(TokenType.ROLLUP, advance=False) 3587 else self._parse_conjunction() 3588 ) 3589 if expressions: 3590 elements["expressions"].extend(expressions) 3591 3592 grouping_sets = self._parse_grouping_sets() 3593 if grouping_sets: 3594 elements["grouping_sets"].extend(grouping_sets) 3595 3596 rollup = None 3597 cube = None 3598 totals = None 3599 3600 index = self._index 3601 with_ = self._match(TokenType.WITH) 3602 if self._match(TokenType.ROLLUP): 3603 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3604 elements["rollup"].extend(ensure_list(rollup)) 3605 3606 if self._match(TokenType.CUBE): 3607 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3608 elements["cube"].extend(ensure_list(cube)) 3609 3610 if self._match_text_seq("TOTALS"): 3611 totals = True 3612 elements["totals"] = True # type: ignore 3613 3614 if not (grouping_sets or rollup or cube or totals): 3615 if with_: 3616 self._retreat(index) 3617 break 3618 3619 return self.expression(exp.Group, **elements) # type: ignore 3620 3621 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3622 if not self._match(TokenType.GROUPING_SETS): 3623 return None 3624 3625 return self._parse_wrapped_csv(self._parse_grouping_set) 3626 3627 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3628 if self._match(TokenType.L_PAREN): 3629 grouping_set = self._parse_csv(self._parse_column) 3630 self._match_r_paren() 3631 return self.expression(exp.Tuple, expressions=grouping_set) 3632 3633 return self._parse_column() 3634 3635 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3636 if not skip_having_token and not self._match(TokenType.HAVING): 3637 return None 3638 return self.expression(exp.Having, this=self._parse_conjunction()) 3639 3640 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3641 if not self._match(TokenType.QUALIFY): 3642 return None 3643 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3644 3645 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3646 if skip_start_token: 3647 start = None 3648 elif self._match(TokenType.START_WITH): 3649 start = self._parse_conjunction() 3650 else: 3651 return None 3652 3653 self._match(TokenType.CONNECT_BY) 3654 nocycle = self._match_text_seq("NOCYCLE") 3655 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3656 exp.Prior, this=self._parse_bitwise() 3657 ) 3658 connect = self._parse_conjunction() 3659 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3660 3661 if not start and self._match(TokenType.START_WITH): 3662 start = self._parse_conjunction() 3663 3664 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3665 3666 def _parse_name_as_expression(self) -> exp.Alias: 3667 return self.expression( 3668 exp.Alias, 3669 alias=self._parse_id_var(any_token=True), 3670 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3671 ) 3672 3673 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3674 if self._match_text_seq("INTERPOLATE"): 3675 return self._parse_wrapped_csv(self._parse_name_as_expression) 3676 return None 3677 3678 def _parse_order( 3679 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3680 ) -> t.Optional[exp.Expression]: 3681 siblings = None 3682 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3683 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3684 return this 3685 3686 siblings = True 3687 3688 return self.expression( 3689 exp.Order, 3690 this=this, 3691 expressions=self._parse_csv(self._parse_ordered), 3692 interpolate=self._parse_interpolate(), 3693 siblings=siblings, 3694 ) 3695 3696 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3697 if not self._match(token): 3698 return None 3699 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3700 3701 def _parse_ordered( 3702 self, parse_method: t.Optional[t.Callable] = None 3703 ) -> t.Optional[exp.Ordered]: 3704 this = parse_method() if parse_method else self._parse_conjunction() 3705 if not this: 3706 return None 3707 3708 asc = self._match(TokenType.ASC) 3709 desc = self._match(TokenType.DESC) or (asc and False) 3710 3711 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3712 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3713 3714 nulls_first = is_nulls_first or False 3715 explicitly_null_ordered = is_nulls_first or is_nulls_last 3716 3717 if ( 3718 not explicitly_null_ordered 3719 and ( 3720 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3721 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3722 ) 3723 and self.dialect.NULL_ORDERING != "nulls_are_last" 3724 ): 3725 nulls_first = True 3726 3727 if self._match_text_seq("WITH", "FILL"): 3728 with_fill = self.expression( 3729 exp.WithFill, 3730 **{ # type: ignore 3731 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3732 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3733 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3734 }, 3735 ) 3736 else: 3737 with_fill = None 3738 3739 return self.expression( 3740 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3741 ) 3742 3743 def _parse_limit( 3744 self, 3745 this: t.Optional[exp.Expression] = None, 3746 top: bool = False, 3747 skip_limit_token: bool = False, 3748 ) -> t.Optional[exp.Expression]: 3749 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3750 comments = self._prev_comments 3751 if top: 3752 limit_paren = self._match(TokenType.L_PAREN) 3753 expression = self._parse_term() if limit_paren else self._parse_number() 3754 3755 if limit_paren: 3756 self._match_r_paren() 3757 else: 3758 expression = self._parse_term() 3759 3760 if self._match(TokenType.COMMA): 3761 offset = expression 3762 expression = self._parse_term() 3763 else: 3764 offset = None 3765 3766 limit_exp = self.expression( 3767 exp.Limit, 3768 this=this, 3769 expression=expression, 3770 offset=offset, 3771 comments=comments, 3772 expressions=self._parse_limit_by(), 3773 ) 3774 3775 return limit_exp 3776 3777 if self._match(TokenType.FETCH): 3778 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3779 direction = self._prev.text.upper() if direction else "FIRST" 3780 3781 count = self._parse_field(tokens=self.FETCH_TOKENS) 3782 percent = self._match(TokenType.PERCENT) 3783 3784 self._match_set((TokenType.ROW, TokenType.ROWS)) 3785 3786 only = self._match_text_seq("ONLY") 3787 with_ties = self._match_text_seq("WITH", "TIES") 3788 3789 if only and with_ties: 3790 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3791 3792 return self.expression( 3793 exp.Fetch, 3794 direction=direction, 3795 count=count, 3796 percent=percent, 3797 with_ties=with_ties, 3798 ) 3799 3800 return this 3801 3802 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3803 if not self._match(TokenType.OFFSET): 3804 return this 3805 3806 count = self._parse_term() 3807 self._match_set((TokenType.ROW, TokenType.ROWS)) 3808 3809 return self.expression( 3810 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3811 ) 3812 3813 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3814 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3815 3816 def _parse_locks(self) -> t.List[exp.Lock]: 3817 locks = [] 3818 while True: 3819 if self._match_text_seq("FOR", "UPDATE"): 3820 update = True 3821 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3822 "LOCK", "IN", "SHARE", "MODE" 3823 ): 3824 update = False 3825 else: 3826 break 3827 3828 expressions = None 3829 if self._match_text_seq("OF"): 3830 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3831 3832 wait: t.Optional[bool | exp.Expression] = None 3833 if self._match_text_seq("NOWAIT"): 3834 wait = True 3835 elif self._match_text_seq("WAIT"): 3836 wait = self._parse_primary() 3837 elif self._match_text_seq("SKIP", "LOCKED"): 3838 wait = False 3839 3840 locks.append( 3841 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3842 ) 3843 3844 return locks 3845 3846 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3847 while this and self._match_set(self.SET_OPERATIONS): 3848 token_type = self._prev.token_type 3849 3850 if token_type == TokenType.UNION: 3851 operation = exp.Union 3852 elif token_type == TokenType.EXCEPT: 3853 operation = exp.Except 3854 else: 3855 operation = exp.Intersect 3856 3857 comments = self._prev.comments 3858 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3859 by_name = self._match_text_seq("BY", "NAME") 3860 expression = self._parse_select(nested=True, parse_set_operation=False) 3861 3862 this = self.expression( 3863 operation, 3864 comments=comments, 3865 this=this, 3866 distinct=distinct, 3867 by_name=by_name, 3868 expression=expression, 3869 ) 3870 3871 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3872 expression = this.expression 3873 3874 if expression: 3875 for arg in self.UNION_MODIFIERS: 3876 expr = expression.args.get(arg) 3877 if expr: 3878 this.set(arg, expr.pop()) 3879 3880 return this 3881 3882 def _parse_expression(self) -> t.Optional[exp.Expression]: 3883 return self._parse_alias(self._parse_conjunction()) 3884 3885 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3886 this = self._parse_equality() 3887 3888 if self._match(TokenType.COLON_EQ): 3889 this = self.expression( 3890 exp.PropertyEQ, 3891 this=this, 3892 comments=self._prev_comments, 3893 expression=self._parse_conjunction(), 3894 ) 3895 3896 while self._match_set(self.CONJUNCTION): 3897 this = self.expression( 3898 self.CONJUNCTION[self._prev.token_type], 3899 this=this, 3900 comments=self._prev_comments, 3901 expression=self._parse_equality(), 3902 ) 3903 return this 3904 3905 def _parse_equality(self) -> t.Optional[exp.Expression]: 3906 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3907 3908 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3909 return self._parse_tokens(self._parse_range, self.COMPARISON) 3910 3911 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3912 this = this or self._parse_bitwise() 3913 negate = self._match(TokenType.NOT) 3914 3915 if self._match_set(self.RANGE_PARSERS): 3916 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3917 if not expression: 3918 return this 3919 3920 this = expression 3921 elif self._match(TokenType.ISNULL): 3922 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3923 3924 # Postgres supports ISNULL and NOTNULL for conditions. 3925 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3926 if self._match(TokenType.NOTNULL): 3927 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3928 this = self.expression(exp.Not, this=this) 3929 3930 if negate: 3931 this = self.expression(exp.Not, this=this) 3932 3933 if self._match(TokenType.IS): 3934 this = self._parse_is(this) 3935 3936 return this 3937 3938 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3939 index = self._index - 1 3940 negate = self._match(TokenType.NOT) 3941 3942 if self._match_text_seq("DISTINCT", "FROM"): 3943 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3944 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3945 3946 expression = self._parse_null() or self._parse_boolean() 3947 if not expression: 3948 self._retreat(index) 3949 return None 3950 3951 this = self.expression(exp.Is, this=this, expression=expression) 3952 return self.expression(exp.Not, this=this) if negate else this 3953 3954 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3955 unnest = self._parse_unnest(with_alias=False) 3956 if unnest: 3957 this = self.expression(exp.In, this=this, unnest=unnest) 3958 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3959 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3960 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3961 3962 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3963 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3964 else: 3965 this = self.expression(exp.In, this=this, expressions=expressions) 3966 3967 if matched_l_paren: 3968 self._match_r_paren(this) 3969 elif not self._match(TokenType.R_BRACKET, expression=this): 3970 self.raise_error("Expecting ]") 3971 else: 3972 this = self.expression(exp.In, this=this, field=self._parse_field()) 3973 3974 return this 3975 3976 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3977 low = self._parse_bitwise() 3978 self._match(TokenType.AND) 3979 high = self._parse_bitwise() 3980 return self.expression(exp.Between, this=this, low=low, high=high) 3981 3982 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3983 if not self._match(TokenType.ESCAPE): 3984 return this 3985 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3986 3987 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3988 index = self._index 3989 3990 if not self._match(TokenType.INTERVAL) and match_interval: 3991 return None 3992 3993 if self._match(TokenType.STRING, advance=False): 3994 this = self._parse_primary() 3995 else: 3996 this = self._parse_term() 3997 3998 if not this or ( 3999 isinstance(this, exp.Column) 4000 and not this.table 4001 and not this.this.quoted 4002 and this.name.upper() == "IS" 4003 ): 4004 self._retreat(index) 4005 return None 4006 4007 unit = self._parse_function() or ( 4008 not self._match(TokenType.ALIAS, advance=False) 4009 and self._parse_var(any_token=True, upper=True) 4010 ) 4011 4012 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4013 # each INTERVAL expression into this canonical form so it's easy to transpile 4014 if this and this.is_number: 4015 this = exp.Literal.string(this.name) 4016 elif this and this.is_string: 4017 parts = this.name.split() 4018 4019 if len(parts) == 2: 4020 if unit: 4021 # This is not actually a unit, it's something else (e.g. a "window side") 4022 unit = None 4023 self._retreat(self._index - 1) 4024 4025 this = exp.Literal.string(parts[0]) 4026 unit = self.expression(exp.Var, this=parts[1].upper()) 4027 4028 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4029 unit = self.expression( 4030 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4031 ) 4032 4033 return self.expression(exp.Interval, this=this, unit=unit) 4034 4035 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4036 this = self._parse_term() 4037 4038 while True: 4039 if self._match_set(self.BITWISE): 4040 this = self.expression( 4041 self.BITWISE[self._prev.token_type], 4042 this=this, 4043 expression=self._parse_term(), 4044 ) 4045 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4046 this = self.expression( 4047 exp.DPipe, 4048 this=this, 4049 expression=self._parse_term(), 4050 safe=not self.dialect.STRICT_STRING_CONCAT, 4051 ) 4052 elif self._match(TokenType.DQMARK): 4053 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4054 elif self._match_pair(TokenType.LT, TokenType.LT): 4055 this = self.expression( 4056 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4057 ) 4058 elif self._match_pair(TokenType.GT, TokenType.GT): 4059 this = self.expression( 4060 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4061 ) 4062 else: 4063 break 4064 4065 return this 4066 4067 def _parse_term(self) -> t.Optional[exp.Expression]: 4068 return self._parse_tokens(self._parse_factor, self.TERM) 4069 4070 def _parse_factor(self) -> t.Optional[exp.Expression]: 4071 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4072 this = parse_method() 4073 4074 while self._match_set(self.FACTOR): 4075 this = self.expression( 4076 self.FACTOR[self._prev.token_type], 4077 this=this, 4078 comments=self._prev_comments, 4079 expression=parse_method(), 4080 ) 4081 if isinstance(this, exp.Div): 4082 this.args["typed"] = self.dialect.TYPED_DIVISION 4083 this.args["safe"] = self.dialect.SAFE_DIVISION 4084 4085 return this 4086 4087 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4088 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4089 4090 def _parse_unary(self) -> t.Optional[exp.Expression]: 4091 if self._match_set(self.UNARY_PARSERS): 4092 return self.UNARY_PARSERS[self._prev.token_type](self) 4093 return self._parse_at_time_zone(self._parse_type()) 4094 4095 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4096 interval = parse_interval and self._parse_interval() 4097 if interval: 4098 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4099 while True: 4100 index = self._index 4101 self._match(TokenType.PLUS) 4102 4103 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4104 self._retreat(index) 4105 break 4106 4107 interval = self.expression( # type: ignore 4108 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4109 ) 4110 4111 return interval 4112 4113 index = self._index 4114 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4115 this = self._parse_column() 4116 4117 if data_type: 4118 if isinstance(this, exp.Literal): 4119 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4120 if parser: 4121 return parser(self, this, data_type) 4122 return self.expression(exp.Cast, this=this, to=data_type) 4123 if not data_type.expressions: 4124 self._retreat(index) 4125 return self._parse_column() 4126 return self._parse_column_ops(data_type) 4127 4128 return this and self._parse_column_ops(this) 4129 4130 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4131 this = self._parse_type() 4132 if not this: 4133 return None 4134 4135 if isinstance(this, exp.Column) and not this.table: 4136 this = exp.var(this.name.upper()) 4137 4138 return self.expression( 4139 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4140 ) 4141 4142 def _parse_types( 4143 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4144 ) -> t.Optional[exp.Expression]: 4145 index = self._index 4146 4147 this: t.Optional[exp.Expression] = None 4148 prefix = self._match_text_seq("SYSUDTLIB", ".") 4149 4150 if not self._match_set(self.TYPE_TOKENS): 4151 identifier = allow_identifiers and self._parse_id_var( 4152 any_token=False, tokens=(TokenType.VAR,) 4153 ) 4154 if identifier: 4155 tokens = self.dialect.tokenize(identifier.name) 4156 4157 if len(tokens) != 1: 4158 self.raise_error("Unexpected identifier", self._prev) 4159 4160 if tokens[0].token_type in self.TYPE_TOKENS: 4161 self._prev = tokens[0] 4162 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4163 type_name = identifier.name 4164 4165 while self._match(TokenType.DOT): 4166 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4167 4168 this = exp.DataType.build(type_name, udt=True) 4169 else: 4170 self._retreat(self._index - 1) 4171 return None 4172 else: 4173 return None 4174 4175 type_token = self._prev.token_type 4176 4177 if type_token == TokenType.PSEUDO_TYPE: 4178 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4179 4180 if type_token == TokenType.OBJECT_IDENTIFIER: 4181 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4182 4183 nested = type_token in self.NESTED_TYPE_TOKENS 4184 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4185 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4186 expressions = None 4187 maybe_func = False 4188 4189 if self._match(TokenType.L_PAREN): 4190 if is_struct: 4191 expressions = self._parse_csv(self._parse_struct_types) 4192 elif nested: 4193 expressions = self._parse_csv( 4194 lambda: self._parse_types( 4195 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4196 ) 4197 ) 4198 elif type_token in self.ENUM_TYPE_TOKENS: 4199 expressions = self._parse_csv(self._parse_equality) 4200 elif is_aggregate: 4201 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4202 any_token=False, tokens=(TokenType.VAR,) 4203 ) 4204 if not func_or_ident or not self._match(TokenType.COMMA): 4205 return None 4206 expressions = self._parse_csv( 4207 lambda: self._parse_types( 4208 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4209 ) 4210 ) 4211 expressions.insert(0, func_or_ident) 4212 else: 4213 expressions = self._parse_csv(self._parse_type_size) 4214 4215 if not expressions or not self._match(TokenType.R_PAREN): 4216 self._retreat(index) 4217 return None 4218 4219 maybe_func = True 4220 4221 values: t.Optional[t.List[exp.Expression]] = None 4222 4223 if nested and self._match(TokenType.LT): 4224 if is_struct: 4225 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4226 else: 4227 expressions = self._parse_csv( 4228 lambda: self._parse_types( 4229 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4230 ) 4231 ) 4232 4233 if not self._match(TokenType.GT): 4234 self.raise_error("Expecting >") 4235 4236 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4237 values = self._parse_csv(self._parse_conjunction) 4238 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4239 4240 if type_token in self.TIMESTAMPS: 4241 if self._match_text_seq("WITH", "TIME", "ZONE"): 4242 maybe_func = False 4243 tz_type = ( 4244 exp.DataType.Type.TIMETZ 4245 if type_token in self.TIMES 4246 else exp.DataType.Type.TIMESTAMPTZ 4247 ) 4248 this = exp.DataType(this=tz_type, expressions=expressions) 4249 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4250 maybe_func = False 4251 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4252 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4253 maybe_func = False 4254 elif type_token == TokenType.INTERVAL: 4255 unit = self._parse_var(upper=True) 4256 if unit: 4257 if self._match_text_seq("TO"): 4258 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4259 4260 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4261 else: 4262 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4263 4264 if maybe_func and check_func: 4265 index2 = self._index 4266 peek = self._parse_string() 4267 4268 if not peek: 4269 self._retreat(index) 4270 return None 4271 4272 self._retreat(index2) 4273 4274 if not this: 4275 if self._match_text_seq("UNSIGNED"): 4276 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4277 if not unsigned_type_token: 4278 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4279 4280 type_token = unsigned_type_token or type_token 4281 4282 this = exp.DataType( 4283 this=exp.DataType.Type[type_token.value], 4284 expressions=expressions, 4285 nested=nested, 4286 values=values, 4287 prefix=prefix, 4288 ) 4289 elif expressions: 4290 this.set("expressions", expressions) 4291 4292 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4293 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4294 4295 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4296 converter = self.TYPE_CONVERTER.get(this.this) 4297 if converter: 4298 this = converter(t.cast(exp.DataType, this)) 4299 4300 return this 4301 4302 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4303 index = self._index 4304 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4305 self._match(TokenType.COLON) 4306 column_def = self._parse_column_def(this) 4307 4308 if type_required and ( 4309 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4310 ): 4311 self._retreat(index) 4312 return self._parse_types() 4313 4314 return column_def 4315 4316 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4317 if not self._match_text_seq("AT", "TIME", "ZONE"): 4318 return this 4319 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4320 4321 def _parse_column(self) -> t.Optional[exp.Expression]: 4322 this = self._parse_column_reference() 4323 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4324 4325 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4326 this = self._parse_field() 4327 if ( 4328 not this 4329 and self._match(TokenType.VALUES, advance=False) 4330 and self.VALUES_FOLLOWED_BY_PAREN 4331 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4332 ): 4333 this = self._parse_id_var() 4334 4335 if isinstance(this, exp.Identifier): 4336 # We bubble up comments from the Identifier to the Column 4337 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4338 4339 return this 4340 4341 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4342 this = self._parse_bracket(this) 4343 4344 while self._match_set(self.COLUMN_OPERATORS): 4345 op_token = self._prev.token_type 4346 op = self.COLUMN_OPERATORS.get(op_token) 4347 4348 if op_token == TokenType.DCOLON: 4349 field = self._parse_types() 4350 if not field: 4351 self.raise_error("Expected type") 4352 elif op and self._curr: 4353 field = self._parse_column_reference() 4354 else: 4355 field = self._parse_field(any_token=True, anonymous_func=True) 4356 4357 if isinstance(field, exp.Func) and this: 4358 # bigquery allows function calls like x.y.count(...) 4359 # SAFE.SUBSTR(...) 4360 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4361 this = exp.replace_tree( 4362 this, 4363 lambda n: ( 4364 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4365 if n.table 4366 else n.this 4367 ) 4368 if isinstance(n, exp.Column) 4369 else n, 4370 ) 4371 4372 if op: 4373 this = op(self, this, field) 4374 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4375 this = self.expression( 4376 exp.Column, 4377 this=field, 4378 table=this.this, 4379 db=this.args.get("table"), 4380 catalog=this.args.get("db"), 4381 ) 4382 else: 4383 this = self.expression(exp.Dot, this=this, expression=field) 4384 this = self._parse_bracket(this) 4385 return this 4386 4387 def _parse_primary(self) -> t.Optional[exp.Expression]: 4388 if self._match_set(self.PRIMARY_PARSERS): 4389 token_type = self._prev.token_type 4390 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4391 4392 if token_type == TokenType.STRING: 4393 expressions = [primary] 4394 while self._match(TokenType.STRING): 4395 expressions.append(exp.Literal.string(self._prev.text)) 4396 4397 if len(expressions) > 1: 4398 return self.expression(exp.Concat, expressions=expressions) 4399 4400 return primary 4401 4402 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4403 return exp.Literal.number(f"0.{self._prev.text}") 4404 4405 if self._match(TokenType.L_PAREN): 4406 comments = self._prev_comments 4407 query = self._parse_select() 4408 4409 if query: 4410 expressions = [query] 4411 else: 4412 expressions = self._parse_expressions() 4413 4414 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4415 4416 if not this and self._match(TokenType.R_PAREN, advance=False): 4417 this = self.expression(exp.Tuple) 4418 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4419 this = self._parse_subquery(this=this, parse_alias=False) 4420 elif isinstance(this, exp.Subquery): 4421 this = self._parse_subquery( 4422 this=self._parse_set_operations(this), parse_alias=False 4423 ) 4424 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4425 this = self.expression(exp.Tuple, expressions=expressions) 4426 else: 4427 this = self.expression(exp.Paren, this=this) 4428 4429 if this: 4430 this.add_comments(comments) 4431 4432 self._match_r_paren(expression=this) 4433 return this 4434 4435 return None 4436 4437 def _parse_field( 4438 self, 4439 any_token: bool = False, 4440 tokens: t.Optional[t.Collection[TokenType]] = None, 4441 anonymous_func: bool = False, 4442 ) -> t.Optional[exp.Expression]: 4443 if anonymous_func: 4444 field = ( 4445 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4446 or self._parse_primary() 4447 ) 4448 else: 4449 field = self._parse_primary() or self._parse_function( 4450 anonymous=anonymous_func, any_token=any_token 4451 ) 4452 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4453 4454 def _parse_function( 4455 self, 4456 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4457 anonymous: bool = False, 4458 optional_parens: bool = True, 4459 any_token: bool = False, 4460 ) -> t.Optional[exp.Expression]: 4461 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4462 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4463 fn_syntax = False 4464 if ( 4465 self._match(TokenType.L_BRACE, advance=False) 4466 and self._next 4467 and self._next.text.upper() == "FN" 4468 ): 4469 self._advance(2) 4470 fn_syntax = True 4471 4472 func = self._parse_function_call( 4473 functions=functions, 4474 anonymous=anonymous, 4475 optional_parens=optional_parens, 4476 any_token=any_token, 4477 ) 4478 4479 if fn_syntax: 4480 self._match(TokenType.R_BRACE) 4481 4482 return func 4483 4484 def _parse_function_call( 4485 self, 4486 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4487 anonymous: bool = False, 4488 optional_parens: bool = True, 4489 any_token: bool = False, 4490 ) -> t.Optional[exp.Expression]: 4491 if not self._curr: 4492 return None 4493 4494 comments = self._curr.comments 4495 token_type = self._curr.token_type 4496 this = self._curr.text 4497 upper = this.upper() 4498 4499 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4500 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4501 self._advance() 4502 return self._parse_window(parser(self)) 4503 4504 if not self._next or self._next.token_type != TokenType.L_PAREN: 4505 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4506 self._advance() 4507 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4508 4509 return None 4510 4511 if any_token: 4512 if token_type in self.RESERVED_TOKENS: 4513 return None 4514 elif token_type not in self.FUNC_TOKENS: 4515 return None 4516 4517 self._advance(2) 4518 4519 parser = self.FUNCTION_PARSERS.get(upper) 4520 if parser and not anonymous: 4521 this = parser(self) 4522 else: 4523 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4524 4525 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4526 this = self.expression(subquery_predicate, this=self._parse_select()) 4527 self._match_r_paren() 4528 return this 4529 4530 if functions is None: 4531 functions = self.FUNCTIONS 4532 4533 function = functions.get(upper) 4534 4535 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4536 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4537 4538 if alias: 4539 args = self._kv_to_prop_eq(args) 4540 4541 if function and not anonymous: 4542 if "dialect" in function.__code__.co_varnames: 4543 func = function(args, dialect=self.dialect) 4544 else: 4545 func = function(args) 4546 4547 func = self.validate_expression(func, args) 4548 if not self.dialect.NORMALIZE_FUNCTIONS: 4549 func.meta["name"] = this 4550 4551 this = func 4552 else: 4553 if token_type == TokenType.IDENTIFIER: 4554 this = exp.Identifier(this=this, quoted=True) 4555 this = self.expression(exp.Anonymous, this=this, expressions=args) 4556 4557 if isinstance(this, exp.Expression): 4558 this.add_comments(comments) 4559 4560 self._match_r_paren(this) 4561 return self._parse_window(this) 4562 4563 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4564 transformed = [] 4565 4566 for e in expressions: 4567 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4568 if isinstance(e, exp.Alias): 4569 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4570 4571 if not isinstance(e, exp.PropertyEQ): 4572 e = self.expression( 4573 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4574 ) 4575 4576 if isinstance(e.this, exp.Column): 4577 e.this.replace(e.this.this) 4578 4579 transformed.append(e) 4580 4581 return transformed 4582 4583 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4584 return self._parse_column_def(self._parse_id_var()) 4585 4586 def _parse_user_defined_function( 4587 self, kind: t.Optional[TokenType] = None 4588 ) -> t.Optional[exp.Expression]: 4589 this = self._parse_id_var() 4590 4591 while self._match(TokenType.DOT): 4592 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4593 4594 if not self._match(TokenType.L_PAREN): 4595 return this 4596 4597 expressions = self._parse_csv(self._parse_function_parameter) 4598 self._match_r_paren() 4599 return self.expression( 4600 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4601 ) 4602 4603 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4604 literal = self._parse_primary() 4605 if literal: 4606 return self.expression(exp.Introducer, this=token.text, expression=literal) 4607 4608 return self.expression(exp.Identifier, this=token.text) 4609 4610 def _parse_session_parameter(self) -> exp.SessionParameter: 4611 kind = None 4612 this = self._parse_id_var() or self._parse_primary() 4613 4614 if this and self._match(TokenType.DOT): 4615 kind = this.name 4616 this = self._parse_var() or self._parse_primary() 4617 4618 return self.expression(exp.SessionParameter, this=this, kind=kind) 4619 4620 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4621 index = self._index 4622 4623 if self._match(TokenType.L_PAREN): 4624 expressions = t.cast( 4625 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4626 ) 4627 4628 if not self._match(TokenType.R_PAREN): 4629 self._retreat(index) 4630 else: 4631 expressions = [self._parse_id_var()] 4632 4633 if self._match_set(self.LAMBDAS): 4634 return self.LAMBDAS[self._prev.token_type](self, expressions) 4635 4636 self._retreat(index) 4637 4638 this: t.Optional[exp.Expression] 4639 4640 if self._match(TokenType.DISTINCT): 4641 this = self.expression( 4642 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4643 ) 4644 else: 4645 this = self._parse_select_or_expression(alias=alias) 4646 4647 return self._parse_limit( 4648 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4649 ) 4650 4651 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4652 index = self._index 4653 if not self._match(TokenType.L_PAREN): 4654 return this 4655 4656 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4657 # expr can be of both types 4658 if self._match_set(self.SELECT_START_TOKENS): 4659 self._retreat(index) 4660 return this 4661 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4662 self._match_r_paren() 4663 return self.expression(exp.Schema, this=this, expressions=args) 4664 4665 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4666 return self._parse_column_def(self._parse_field(any_token=True)) 4667 4668 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4669 # column defs are not really columns, they're identifiers 4670 if isinstance(this, exp.Column): 4671 this = this.this 4672 4673 kind = self._parse_types(schema=True) 4674 4675 if self._match_text_seq("FOR", "ORDINALITY"): 4676 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4677 4678 constraints: t.List[exp.Expression] = [] 4679 4680 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4681 ("ALIAS", "MATERIALIZED") 4682 ): 4683 persisted = self._prev.text.upper() == "MATERIALIZED" 4684 constraints.append( 4685 self.expression( 4686 exp.ComputedColumnConstraint, 4687 this=self._parse_conjunction(), 4688 persisted=persisted or self._match_text_seq("PERSISTED"), 4689 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4690 ) 4691 ) 4692 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4693 self._match(TokenType.ALIAS) 4694 constraints.append( 4695 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4696 ) 4697 4698 while True: 4699 constraint = self._parse_column_constraint() 4700 if not constraint: 4701 break 4702 constraints.append(constraint) 4703 4704 if not kind and not constraints: 4705 return this 4706 4707 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4708 4709 def _parse_auto_increment( 4710 self, 4711 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4712 start = None 4713 increment = None 4714 4715 if self._match(TokenType.L_PAREN, advance=False): 4716 args = self._parse_wrapped_csv(self._parse_bitwise) 4717 start = seq_get(args, 0) 4718 increment = seq_get(args, 1) 4719 elif self._match_text_seq("START"): 4720 start = self._parse_bitwise() 4721 self._match_text_seq("INCREMENT") 4722 increment = self._parse_bitwise() 4723 4724 if start and increment: 4725 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4726 4727 return exp.AutoIncrementColumnConstraint() 4728 4729 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4730 if not self._match_text_seq("REFRESH"): 4731 self._retreat(self._index - 1) 4732 return None 4733 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4734 4735 def _parse_compress(self) -> exp.CompressColumnConstraint: 4736 if self._match(TokenType.L_PAREN, advance=False): 4737 return self.expression( 4738 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4739 ) 4740 4741 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4742 4743 def _parse_generated_as_identity( 4744 self, 4745 ) -> ( 4746 exp.GeneratedAsIdentityColumnConstraint 4747 | exp.ComputedColumnConstraint 4748 | exp.GeneratedAsRowColumnConstraint 4749 ): 4750 if self._match_text_seq("BY", "DEFAULT"): 4751 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4752 this = self.expression( 4753 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4754 ) 4755 else: 4756 self._match_text_seq("ALWAYS") 4757 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4758 4759 self._match(TokenType.ALIAS) 4760 4761 if self._match_text_seq("ROW"): 4762 start = self._match_text_seq("START") 4763 if not start: 4764 self._match(TokenType.END) 4765 hidden = self._match_text_seq("HIDDEN") 4766 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4767 4768 identity = self._match_text_seq("IDENTITY") 4769 4770 if self._match(TokenType.L_PAREN): 4771 if self._match(TokenType.START_WITH): 4772 this.set("start", self._parse_bitwise()) 4773 if self._match_text_seq("INCREMENT", "BY"): 4774 this.set("increment", self._parse_bitwise()) 4775 if self._match_text_seq("MINVALUE"): 4776 this.set("minvalue", self._parse_bitwise()) 4777 if self._match_text_seq("MAXVALUE"): 4778 this.set("maxvalue", self._parse_bitwise()) 4779 4780 if self._match_text_seq("CYCLE"): 4781 this.set("cycle", True) 4782 elif self._match_text_seq("NO", "CYCLE"): 4783 this.set("cycle", False) 4784 4785 if not identity: 4786 this.set("expression", self._parse_range()) 4787 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4788 args = self._parse_csv(self._parse_bitwise) 4789 this.set("start", seq_get(args, 0)) 4790 this.set("increment", seq_get(args, 1)) 4791 4792 self._match_r_paren() 4793 4794 return this 4795 4796 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4797 self._match_text_seq("LENGTH") 4798 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4799 4800 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4801 if self._match_text_seq("NULL"): 4802 return self.expression(exp.NotNullColumnConstraint) 4803 if self._match_text_seq("CASESPECIFIC"): 4804 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4805 if self._match_text_seq("FOR", "REPLICATION"): 4806 return self.expression(exp.NotForReplicationColumnConstraint) 4807 return None 4808 4809 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4810 if self._match(TokenType.CONSTRAINT): 4811 this = self._parse_id_var() 4812 else: 4813 this = None 4814 4815 if self._match_texts(self.CONSTRAINT_PARSERS): 4816 return self.expression( 4817 exp.ColumnConstraint, 4818 this=this, 4819 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4820 ) 4821 4822 return this 4823 4824 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4825 if not self._match(TokenType.CONSTRAINT): 4826 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4827 4828 return self.expression( 4829 exp.Constraint, 4830 this=self._parse_id_var(), 4831 expressions=self._parse_unnamed_constraints(), 4832 ) 4833 4834 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4835 constraints = [] 4836 while True: 4837 constraint = self._parse_unnamed_constraint() or self._parse_function() 4838 if not constraint: 4839 break 4840 constraints.append(constraint) 4841 4842 return constraints 4843 4844 def _parse_unnamed_constraint( 4845 self, constraints: t.Optional[t.Collection[str]] = None 4846 ) -> t.Optional[exp.Expression]: 4847 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4848 constraints or self.CONSTRAINT_PARSERS 4849 ): 4850 return None 4851 4852 constraint = self._prev.text.upper() 4853 if constraint not in self.CONSTRAINT_PARSERS: 4854 self.raise_error(f"No parser found for schema constraint {constraint}.") 4855 4856 return self.CONSTRAINT_PARSERS[constraint](self) 4857 4858 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4859 self._match_text_seq("KEY") 4860 return self.expression( 4861 exp.UniqueColumnConstraint, 4862 this=self._parse_schema(self._parse_id_var(any_token=False)), 4863 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4864 on_conflict=self._parse_on_conflict(), 4865 ) 4866 4867 def _parse_key_constraint_options(self) -> t.List[str]: 4868 options = [] 4869 while True: 4870 if not self._curr: 4871 break 4872 4873 if self._match(TokenType.ON): 4874 action = None 4875 on = self._advance_any() and self._prev.text 4876 4877 if self._match_text_seq("NO", "ACTION"): 4878 action = "NO ACTION" 4879 elif self._match_text_seq("CASCADE"): 4880 action = "CASCADE" 4881 elif self._match_text_seq("RESTRICT"): 4882 action = "RESTRICT" 4883 elif self._match_pair(TokenType.SET, TokenType.NULL): 4884 action = "SET NULL" 4885 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4886 action = "SET DEFAULT" 4887 else: 4888 self.raise_error("Invalid key constraint") 4889 4890 options.append(f"ON {on} {action}") 4891 elif self._match_text_seq("NOT", "ENFORCED"): 4892 options.append("NOT ENFORCED") 4893 elif self._match_text_seq("DEFERRABLE"): 4894 options.append("DEFERRABLE") 4895 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4896 options.append("INITIALLY DEFERRED") 4897 elif self._match_text_seq("NORELY"): 4898 options.append("NORELY") 4899 elif self._match_text_seq("MATCH", "FULL"): 4900 options.append("MATCH FULL") 4901 else: 4902 break 4903 4904 return options 4905 4906 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4907 if match and not self._match(TokenType.REFERENCES): 4908 return None 4909 4910 expressions = None 4911 this = self._parse_table(schema=True) 4912 options = self._parse_key_constraint_options() 4913 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4914 4915 def _parse_foreign_key(self) -> exp.ForeignKey: 4916 expressions = self._parse_wrapped_id_vars() 4917 reference = self._parse_references() 4918 options = {} 4919 4920 while self._match(TokenType.ON): 4921 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4922 self.raise_error("Expected DELETE or UPDATE") 4923 4924 kind = self._prev.text.lower() 4925 4926 if self._match_text_seq("NO", "ACTION"): 4927 action = "NO ACTION" 4928 elif self._match(TokenType.SET): 4929 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4930 action = "SET " + self._prev.text.upper() 4931 else: 4932 self._advance() 4933 action = self._prev.text.upper() 4934 4935 options[kind] = action 4936 4937 return self.expression( 4938 exp.ForeignKey, 4939 expressions=expressions, 4940 reference=reference, 4941 **options, # type: ignore 4942 ) 4943 4944 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4945 return self._parse_field() 4946 4947 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4948 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4949 self._retreat(self._index - 1) 4950 return None 4951 4952 id_vars = self._parse_wrapped_id_vars() 4953 return self.expression( 4954 exp.PeriodForSystemTimeConstraint, 4955 this=seq_get(id_vars, 0), 4956 expression=seq_get(id_vars, 1), 4957 ) 4958 4959 def _parse_primary_key( 4960 self, wrapped_optional: bool = False, in_props: bool = False 4961 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4962 desc = ( 4963 self._match_set((TokenType.ASC, TokenType.DESC)) 4964 and self._prev.token_type == TokenType.DESC 4965 ) 4966 4967 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4968 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4969 4970 expressions = self._parse_wrapped_csv( 4971 self._parse_primary_key_part, optional=wrapped_optional 4972 ) 4973 options = self._parse_key_constraint_options() 4974 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4975 4976 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4977 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4978 4979 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4980 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4981 return this 4982 4983 bracket_kind = self._prev.token_type 4984 expressions = self._parse_csv( 4985 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4986 ) 4987 4988 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4989 self.raise_error("Expected ]") 4990 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4991 self.raise_error("Expected }") 4992 4993 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4994 if bracket_kind == TokenType.L_BRACE: 4995 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4996 elif not this or this.name.upper() == "ARRAY": 4997 this = self.expression(exp.Array, expressions=expressions) 4998 else: 4999 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5000 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5001 5002 self._add_comments(this) 5003 return self._parse_bracket(this) 5004 5005 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5006 if self._match(TokenType.COLON): 5007 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5008 return this 5009 5010 def _parse_case(self) -> t.Optional[exp.Expression]: 5011 ifs = [] 5012 default = None 5013 5014 comments = self._prev_comments 5015 expression = self._parse_conjunction() 5016 5017 while self._match(TokenType.WHEN): 5018 this = self._parse_conjunction() 5019 self._match(TokenType.THEN) 5020 then = self._parse_conjunction() 5021 ifs.append(self.expression(exp.If, this=this, true=then)) 5022 5023 if self._match(TokenType.ELSE): 5024 default = self._parse_conjunction() 5025 5026 if not self._match(TokenType.END): 5027 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5028 default = exp.column("interval") 5029 else: 5030 self.raise_error("Expected END after CASE", self._prev) 5031 5032 return self.expression( 5033 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5034 ) 5035 5036 def _parse_if(self) -> t.Optional[exp.Expression]: 5037 if self._match(TokenType.L_PAREN): 5038 args = self._parse_csv(self._parse_conjunction) 5039 this = self.validate_expression(exp.If.from_arg_list(args), args) 5040 self._match_r_paren() 5041 else: 5042 index = self._index - 1 5043 5044 if self.NO_PAREN_IF_COMMANDS and index == 0: 5045 return self._parse_as_command(self._prev) 5046 5047 condition = self._parse_conjunction() 5048 5049 if not condition: 5050 self._retreat(index) 5051 return None 5052 5053 self._match(TokenType.THEN) 5054 true = self._parse_conjunction() 5055 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5056 self._match(TokenType.END) 5057 this = self.expression(exp.If, this=condition, true=true, false=false) 5058 5059 return this 5060 5061 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5062 if not self._match_text_seq("VALUE", "FOR"): 5063 self._retreat(self._index - 1) 5064 return None 5065 5066 return self.expression( 5067 exp.NextValueFor, 5068 this=self._parse_column(), 5069 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5070 ) 5071 5072 def _parse_extract(self) -> exp.Extract: 5073 this = self._parse_function() or self._parse_var() or self._parse_type() 5074 5075 if self._match(TokenType.FROM): 5076 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5077 5078 if not self._match(TokenType.COMMA): 5079 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5080 5081 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5082 5083 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5084 this = self._parse_conjunction() 5085 5086 if not self._match(TokenType.ALIAS): 5087 if self._match(TokenType.COMMA): 5088 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5089 5090 self.raise_error("Expected AS after CAST") 5091 5092 fmt = None 5093 to = self._parse_types() 5094 5095 if self._match(TokenType.FORMAT): 5096 fmt_string = self._parse_string() 5097 fmt = self._parse_at_time_zone(fmt_string) 5098 5099 if not to: 5100 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5101 if to.this in exp.DataType.TEMPORAL_TYPES: 5102 this = self.expression( 5103 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5104 this=this, 5105 format=exp.Literal.string( 5106 format_time( 5107 fmt_string.this if fmt_string else "", 5108 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5109 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5110 ) 5111 ), 5112 ) 5113 5114 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5115 this.set("zone", fmt.args["zone"]) 5116 return this 5117 elif not to: 5118 self.raise_error("Expected TYPE after CAST") 5119 elif isinstance(to, exp.Identifier): 5120 to = exp.DataType.build(to.name, udt=True) 5121 elif to.this == exp.DataType.Type.CHAR: 5122 if self._match(TokenType.CHARACTER_SET): 5123 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5124 5125 return self.expression( 5126 exp.Cast if strict else exp.TryCast, 5127 this=this, 5128 to=to, 5129 format=fmt, 5130 safe=safe, 5131 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5132 ) 5133 5134 def _parse_string_agg(self) -> exp.Expression: 5135 if self._match(TokenType.DISTINCT): 5136 args: t.List[t.Optional[exp.Expression]] = [ 5137 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5138 ] 5139 if self._match(TokenType.COMMA): 5140 args.extend(self._parse_csv(self._parse_conjunction)) 5141 else: 5142 args = self._parse_csv(self._parse_conjunction) # type: ignore 5143 5144 index = self._index 5145 if not self._match(TokenType.R_PAREN) and args: 5146 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5147 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5148 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5149 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5150 5151 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5152 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5153 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5154 if not self._match_text_seq("WITHIN", "GROUP"): 5155 self._retreat(index) 5156 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5157 5158 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5159 order = self._parse_order(this=seq_get(args, 0)) 5160 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5161 5162 def _parse_convert( 5163 self, strict: bool, safe: t.Optional[bool] = None 5164 ) -> t.Optional[exp.Expression]: 5165 this = self._parse_bitwise() 5166 5167 if self._match(TokenType.USING): 5168 to: t.Optional[exp.Expression] = self.expression( 5169 exp.CharacterSet, this=self._parse_var() 5170 ) 5171 elif self._match(TokenType.COMMA): 5172 to = self._parse_types() 5173 else: 5174 to = None 5175 5176 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5177 5178 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5179 """ 5180 There are generally two variants of the DECODE function: 5181 5182 - DECODE(bin, charset) 5183 - DECODE(expression, search, result [, search, result] ... [, default]) 5184 5185 The second variant will always be parsed into a CASE expression. Note that NULL 5186 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5187 instead of relying on pattern matching. 5188 """ 5189 args = self._parse_csv(self._parse_conjunction) 5190 5191 if len(args) < 3: 5192 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5193 5194 expression, *expressions = args 5195 if not expression: 5196 return None 5197 5198 ifs = [] 5199 for search, result in zip(expressions[::2], expressions[1::2]): 5200 if not search or not result: 5201 return None 5202 5203 if isinstance(search, exp.Literal): 5204 ifs.append( 5205 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5206 ) 5207 elif isinstance(search, exp.Null): 5208 ifs.append( 5209 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5210 ) 5211 else: 5212 cond = exp.or_( 5213 exp.EQ(this=expression.copy(), expression=search), 5214 exp.and_( 5215 exp.Is(this=expression.copy(), expression=exp.Null()), 5216 exp.Is(this=search.copy(), expression=exp.Null()), 5217 copy=False, 5218 ), 5219 copy=False, 5220 ) 5221 ifs.append(exp.If(this=cond, true=result)) 5222 5223 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5224 5225 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5226 self._match_text_seq("KEY") 5227 key = self._parse_column() 5228 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5229 self._match_text_seq("VALUE") 5230 value = self._parse_bitwise() 5231 5232 if not key and not value: 5233 return None 5234 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5235 5236 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5237 if not this or not self._match_text_seq("FORMAT", "JSON"): 5238 return this 5239 5240 return self.expression(exp.FormatJson, this=this) 5241 5242 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5243 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5244 for value in values: 5245 if self._match_text_seq(value, "ON", on): 5246 return f"{value} ON {on}" 5247 5248 return None 5249 5250 @t.overload 5251 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5252 5253 @t.overload 5254 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5255 5256 def _parse_json_object(self, agg=False): 5257 star = self._parse_star() 5258 expressions = ( 5259 [star] 5260 if star 5261 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5262 ) 5263 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5264 5265 unique_keys = None 5266 if self._match_text_seq("WITH", "UNIQUE"): 5267 unique_keys = True 5268 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5269 unique_keys = False 5270 5271 self._match_text_seq("KEYS") 5272 5273 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5274 self._parse_type() 5275 ) 5276 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5277 5278 return self.expression( 5279 exp.JSONObjectAgg if agg else exp.JSONObject, 5280 expressions=expressions, 5281 null_handling=null_handling, 5282 unique_keys=unique_keys, 5283 return_type=return_type, 5284 encoding=encoding, 5285 ) 5286 5287 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5288 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5289 if not self._match_text_seq("NESTED"): 5290 this = self._parse_id_var() 5291 kind = self._parse_types(allow_identifiers=False) 5292 nested = None 5293 else: 5294 this = None 5295 kind = None 5296 nested = True 5297 5298 path = self._match_text_seq("PATH") and self._parse_string() 5299 nested_schema = nested and self._parse_json_schema() 5300 5301 return self.expression( 5302 exp.JSONColumnDef, 5303 this=this, 5304 kind=kind, 5305 path=path, 5306 nested_schema=nested_schema, 5307 ) 5308 5309 def _parse_json_schema(self) -> exp.JSONSchema: 5310 self._match_text_seq("COLUMNS") 5311 return self.expression( 5312 exp.JSONSchema, 5313 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5314 ) 5315 5316 def _parse_json_table(self) -> exp.JSONTable: 5317 this = self._parse_format_json(self._parse_bitwise()) 5318 path = self._match(TokenType.COMMA) and self._parse_string() 5319 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5320 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5321 schema = self._parse_json_schema() 5322 5323 return exp.JSONTable( 5324 this=this, 5325 schema=schema, 5326 path=path, 5327 error_handling=error_handling, 5328 empty_handling=empty_handling, 5329 ) 5330 5331 def _parse_match_against(self) -> exp.MatchAgainst: 5332 expressions = self._parse_csv(self._parse_column) 5333 5334 self._match_text_seq(")", "AGAINST", "(") 5335 5336 this = self._parse_string() 5337 5338 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5339 modifier = "IN NATURAL LANGUAGE MODE" 5340 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5341 modifier = f"{modifier} WITH QUERY EXPANSION" 5342 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5343 modifier = "IN BOOLEAN MODE" 5344 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5345 modifier = "WITH QUERY EXPANSION" 5346 else: 5347 modifier = None 5348 5349 return self.expression( 5350 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5351 ) 5352 5353 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5354 def _parse_open_json(self) -> exp.OpenJSON: 5355 this = self._parse_bitwise() 5356 path = self._match(TokenType.COMMA) and self._parse_string() 5357 5358 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5359 this = self._parse_field(any_token=True) 5360 kind = self._parse_types() 5361 path = self._parse_string() 5362 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5363 5364 return self.expression( 5365 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5366 ) 5367 5368 expressions = None 5369 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5370 self._match_l_paren() 5371 expressions = self._parse_csv(_parse_open_json_column_def) 5372 5373 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5374 5375 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5376 args = self._parse_csv(self._parse_bitwise) 5377 5378 if self._match(TokenType.IN): 5379 return self.expression( 5380 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5381 ) 5382 5383 if haystack_first: 5384 haystack = seq_get(args, 0) 5385 needle = seq_get(args, 1) 5386 else: 5387 needle = seq_get(args, 0) 5388 haystack = seq_get(args, 1) 5389 5390 return self.expression( 5391 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5392 ) 5393 5394 def _parse_predict(self) -> exp.Predict: 5395 self._match_text_seq("MODEL") 5396 this = self._parse_table() 5397 5398 self._match(TokenType.COMMA) 5399 self._match_text_seq("TABLE") 5400 5401 return self.expression( 5402 exp.Predict, 5403 this=this, 5404 expression=self._parse_table(), 5405 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5406 ) 5407 5408 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5409 args = self._parse_csv(self._parse_table) 5410 return exp.JoinHint(this=func_name.upper(), expressions=args) 5411 5412 def _parse_substring(self) -> exp.Substring: 5413 # Postgres supports the form: substring(string [from int] [for int]) 5414 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5415 5416 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5417 5418 if self._match(TokenType.FROM): 5419 args.append(self._parse_bitwise()) 5420 if self._match(TokenType.FOR): 5421 if len(args) == 1: 5422 args.append(exp.Literal.number(1)) 5423 args.append(self._parse_bitwise()) 5424 5425 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5426 5427 def _parse_trim(self) -> exp.Trim: 5428 # https://www.w3resource.com/sql/character-functions/trim.php 5429 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5430 5431 position = None 5432 collation = None 5433 expression = None 5434 5435 if self._match_texts(self.TRIM_TYPES): 5436 position = self._prev.text.upper() 5437 5438 this = self._parse_bitwise() 5439 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5440 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5441 expression = self._parse_bitwise() 5442 5443 if invert_order: 5444 this, expression = expression, this 5445 5446 if self._match(TokenType.COLLATE): 5447 collation = self._parse_bitwise() 5448 5449 return self.expression( 5450 exp.Trim, this=this, position=position, expression=expression, collation=collation 5451 ) 5452 5453 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5454 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5455 5456 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5457 return self._parse_window(self._parse_id_var(), alias=True) 5458 5459 def _parse_respect_or_ignore_nulls( 5460 self, this: t.Optional[exp.Expression] 5461 ) -> t.Optional[exp.Expression]: 5462 if self._match_text_seq("IGNORE", "NULLS"): 5463 return self.expression(exp.IgnoreNulls, this=this) 5464 if self._match_text_seq("RESPECT", "NULLS"): 5465 return self.expression(exp.RespectNulls, this=this) 5466 return this 5467 5468 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5469 if self._match(TokenType.HAVING): 5470 self._match_texts(("MAX", "MIN")) 5471 max = self._prev.text.upper() != "MIN" 5472 return self.expression( 5473 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5474 ) 5475 5476 return this 5477 5478 def _parse_window( 5479 self, this: t.Optional[exp.Expression], alias: bool = False 5480 ) -> t.Optional[exp.Expression]: 5481 func = this 5482 comments = func.comments if isinstance(func, exp.Expression) else None 5483 5484 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5485 self._match(TokenType.WHERE) 5486 this = self.expression( 5487 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5488 ) 5489 self._match_r_paren() 5490 5491 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5492 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5493 if self._match_text_seq("WITHIN", "GROUP"): 5494 order = self._parse_wrapped(self._parse_order) 5495 this = self.expression(exp.WithinGroup, this=this, expression=order) 5496 5497 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5498 # Some dialects choose to implement and some do not. 5499 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5500 5501 # There is some code above in _parse_lambda that handles 5502 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5503 5504 # The below changes handle 5505 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5506 5507 # Oracle allows both formats 5508 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5509 # and Snowflake chose to do the same for familiarity 5510 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5511 if isinstance(this, exp.AggFunc): 5512 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5513 5514 if ignore_respect and ignore_respect is not this: 5515 ignore_respect.replace(ignore_respect.this) 5516 this = self.expression(ignore_respect.__class__, this=this) 5517 5518 this = self._parse_respect_or_ignore_nulls(this) 5519 5520 # bigquery select from window x AS (partition by ...) 5521 if alias: 5522 over = None 5523 self._match(TokenType.ALIAS) 5524 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5525 return this 5526 else: 5527 over = self._prev.text.upper() 5528 5529 if comments and isinstance(func, exp.Expression): 5530 func.pop_comments() 5531 5532 if not self._match(TokenType.L_PAREN): 5533 return self.expression( 5534 exp.Window, 5535 comments=comments, 5536 this=this, 5537 alias=self._parse_id_var(False), 5538 over=over, 5539 ) 5540 5541 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5542 5543 first = self._match(TokenType.FIRST) 5544 if self._match_text_seq("LAST"): 5545 first = False 5546 5547 partition, order = self._parse_partition_and_order() 5548 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5549 5550 if kind: 5551 self._match(TokenType.BETWEEN) 5552 start = self._parse_window_spec() 5553 self._match(TokenType.AND) 5554 end = self._parse_window_spec() 5555 5556 spec = self.expression( 5557 exp.WindowSpec, 5558 kind=kind, 5559 start=start["value"], 5560 start_side=start["side"], 5561 end=end["value"], 5562 end_side=end["side"], 5563 ) 5564 else: 5565 spec = None 5566 5567 self._match_r_paren() 5568 5569 window = self.expression( 5570 exp.Window, 5571 comments=comments, 5572 this=this, 5573 partition_by=partition, 5574 order=order, 5575 spec=spec, 5576 alias=window_alias, 5577 over=over, 5578 first=first, 5579 ) 5580 5581 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5582 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5583 return self._parse_window(window, alias=alias) 5584 5585 return window 5586 5587 def _parse_partition_and_order( 5588 self, 5589 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5590 return self._parse_partition_by(), self._parse_order() 5591 5592 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5593 self._match(TokenType.BETWEEN) 5594 5595 return { 5596 "value": ( 5597 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5598 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5599 or self._parse_bitwise() 5600 ), 5601 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5602 } 5603 5604 def _parse_alias( 5605 self, this: t.Optional[exp.Expression], explicit: bool = False 5606 ) -> t.Optional[exp.Expression]: 5607 any_token = self._match(TokenType.ALIAS) 5608 comments = self._prev_comments or [] 5609 5610 if explicit and not any_token: 5611 return this 5612 5613 if self._match(TokenType.L_PAREN): 5614 aliases = self.expression( 5615 exp.Aliases, 5616 comments=comments, 5617 this=this, 5618 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5619 ) 5620 self._match_r_paren(aliases) 5621 return aliases 5622 5623 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5624 self.STRING_ALIASES and self._parse_string_as_identifier() 5625 ) 5626 5627 if alias: 5628 comments.extend(alias.pop_comments()) 5629 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5630 column = this.this 5631 5632 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5633 if not this.comments and column and column.comments: 5634 this.comments = column.pop_comments() 5635 5636 return this 5637 5638 def _parse_id_var( 5639 self, 5640 any_token: bool = True, 5641 tokens: t.Optional[t.Collection[TokenType]] = None, 5642 ) -> t.Optional[exp.Expression]: 5643 expression = self._parse_identifier() 5644 if not expression and ( 5645 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5646 ): 5647 quoted = self._prev.token_type == TokenType.STRING 5648 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5649 5650 return expression 5651 5652 def _parse_string(self) -> t.Optional[exp.Expression]: 5653 if self._match_set(self.STRING_PARSERS): 5654 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5655 return self._parse_placeholder() 5656 5657 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5658 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5659 5660 def _parse_number(self) -> t.Optional[exp.Expression]: 5661 if self._match_set(self.NUMERIC_PARSERS): 5662 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5663 return self._parse_placeholder() 5664 5665 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5666 if self._match(TokenType.IDENTIFIER): 5667 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5668 return self._parse_placeholder() 5669 5670 def _parse_var( 5671 self, 5672 any_token: bool = False, 5673 tokens: t.Optional[t.Collection[TokenType]] = None, 5674 upper: bool = False, 5675 ) -> t.Optional[exp.Expression]: 5676 if ( 5677 (any_token and self._advance_any()) 5678 or self._match(TokenType.VAR) 5679 or (self._match_set(tokens) if tokens else False) 5680 ): 5681 return self.expression( 5682 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5683 ) 5684 return self._parse_placeholder() 5685 5686 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5687 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5688 self._advance() 5689 return self._prev 5690 return None 5691 5692 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5693 return self._parse_var() or self._parse_string() 5694 5695 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5696 return self._parse_primary() or self._parse_var(any_token=True) 5697 5698 def _parse_null(self) -> t.Optional[exp.Expression]: 5699 if self._match_set(self.NULL_TOKENS): 5700 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5701 return self._parse_placeholder() 5702 5703 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5704 if self._match(TokenType.TRUE): 5705 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5706 if self._match(TokenType.FALSE): 5707 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5708 return self._parse_placeholder() 5709 5710 def _parse_star(self) -> t.Optional[exp.Expression]: 5711 if self._match(TokenType.STAR): 5712 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5713 return self._parse_placeholder() 5714 5715 def _parse_parameter(self) -> exp.Parameter: 5716 this = self._parse_identifier() or self._parse_primary_or_var() 5717 return self.expression(exp.Parameter, this=this) 5718 5719 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5720 if self._match_set(self.PLACEHOLDER_PARSERS): 5721 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5722 if placeholder: 5723 return placeholder 5724 self._advance(-1) 5725 return None 5726 5727 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5728 if not self._match_texts(keywords): 5729 return None 5730 if self._match(TokenType.L_PAREN, advance=False): 5731 return self._parse_wrapped_csv(self._parse_expression) 5732 5733 expression = self._parse_expression() 5734 return [expression] if expression else None 5735 5736 def _parse_csv( 5737 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5738 ) -> t.List[exp.Expression]: 5739 parse_result = parse_method() 5740 items = [parse_result] if parse_result is not None else [] 5741 5742 while self._match(sep): 5743 self._add_comments(parse_result) 5744 parse_result = parse_method() 5745 if parse_result is not None: 5746 items.append(parse_result) 5747 5748 return items 5749 5750 def _parse_tokens( 5751 self, parse_method: t.Callable, expressions: t.Dict 5752 ) -> t.Optional[exp.Expression]: 5753 this = parse_method() 5754 5755 while self._match_set(expressions): 5756 this = self.expression( 5757 expressions[self._prev.token_type], 5758 this=this, 5759 comments=self._prev_comments, 5760 expression=parse_method(), 5761 ) 5762 5763 return this 5764 5765 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5766 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5767 5768 def _parse_wrapped_csv( 5769 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5770 ) -> t.List[exp.Expression]: 5771 return self._parse_wrapped( 5772 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5773 ) 5774 5775 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5776 wrapped = self._match(TokenType.L_PAREN) 5777 if not wrapped and not optional: 5778 self.raise_error("Expecting (") 5779 parse_result = parse_method() 5780 if wrapped: 5781 self._match_r_paren() 5782 return parse_result 5783 5784 def _parse_expressions(self) -> t.List[exp.Expression]: 5785 return self._parse_csv(self._parse_expression) 5786 5787 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5788 return self._parse_select() or self._parse_set_operations( 5789 self._parse_expression() if alias else self._parse_conjunction() 5790 ) 5791 5792 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5793 return self._parse_query_modifiers( 5794 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5795 ) 5796 5797 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5798 this = None 5799 if self._match_texts(self.TRANSACTION_KIND): 5800 this = self._prev.text 5801 5802 self._match_texts(("TRANSACTION", "WORK")) 5803 5804 modes = [] 5805 while True: 5806 mode = [] 5807 while self._match(TokenType.VAR): 5808 mode.append(self._prev.text) 5809 5810 if mode: 5811 modes.append(" ".join(mode)) 5812 if not self._match(TokenType.COMMA): 5813 break 5814 5815 return self.expression(exp.Transaction, this=this, modes=modes) 5816 5817 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5818 chain = None 5819 savepoint = None 5820 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5821 5822 self._match_texts(("TRANSACTION", "WORK")) 5823 5824 if self._match_text_seq("TO"): 5825 self._match_text_seq("SAVEPOINT") 5826 savepoint = self._parse_id_var() 5827 5828 if self._match(TokenType.AND): 5829 chain = not self._match_text_seq("NO") 5830 self._match_text_seq("CHAIN") 5831 5832 if is_rollback: 5833 return self.expression(exp.Rollback, savepoint=savepoint) 5834 5835 return self.expression(exp.Commit, chain=chain) 5836 5837 def _parse_refresh(self) -> exp.Refresh: 5838 self._match(TokenType.TABLE) 5839 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5840 5841 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5842 if not self._match_text_seq("ADD"): 5843 return None 5844 5845 self._match(TokenType.COLUMN) 5846 exists_column = self._parse_exists(not_=True) 5847 expression = self._parse_field_def() 5848 5849 if expression: 5850 expression.set("exists", exists_column) 5851 5852 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5853 if self._match_texts(("FIRST", "AFTER")): 5854 position = self._prev.text 5855 column_position = self.expression( 5856 exp.ColumnPosition, this=self._parse_column(), position=position 5857 ) 5858 expression.set("position", column_position) 5859 5860 return expression 5861 5862 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5863 drop = self._match(TokenType.DROP) and self._parse_drop() 5864 if drop and not isinstance(drop, exp.Command): 5865 drop.set("kind", drop.args.get("kind", "COLUMN")) 5866 return drop 5867 5868 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5869 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5870 return self.expression( 5871 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5872 ) 5873 5874 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5875 index = self._index - 1 5876 5877 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5878 return self._parse_csv( 5879 lambda: self.expression( 5880 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5881 ) 5882 ) 5883 5884 self._retreat(index) 5885 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5886 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5887 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5888 5889 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5890 if self._match_texts(self.ALTER_ALTER_PARSERS): 5891 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5892 5893 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5894 # keyword after ALTER we default to parsing this statement 5895 self._match(TokenType.COLUMN) 5896 column = self._parse_field(any_token=True) 5897 5898 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5899 return self.expression(exp.AlterColumn, this=column, drop=True) 5900 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5901 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5902 if self._match(TokenType.COMMENT): 5903 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5904 5905 self._match_text_seq("SET", "DATA") 5906 self._match_text_seq("TYPE") 5907 return self.expression( 5908 exp.AlterColumn, 5909 this=column, 5910 dtype=self._parse_types(), 5911 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5912 using=self._match(TokenType.USING) and self._parse_conjunction(), 5913 ) 5914 5915 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5916 if self._match_texts(("ALL", "EVEN", "AUTO")): 5917 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5918 5919 self._match_text_seq("KEY", "DISTKEY") 5920 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5921 5922 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5923 if compound: 5924 self._match_text_seq("SORTKEY") 5925 5926 if self._match(TokenType.L_PAREN, advance=False): 5927 return self.expression( 5928 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5929 ) 5930 5931 self._match_texts(("AUTO", "NONE")) 5932 return self.expression( 5933 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5934 ) 5935 5936 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5937 index = self._index - 1 5938 5939 partition_exists = self._parse_exists() 5940 if self._match(TokenType.PARTITION, advance=False): 5941 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5942 5943 self._retreat(index) 5944 return self._parse_csv(self._parse_drop_column) 5945 5946 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5947 if self._match(TokenType.COLUMN): 5948 exists = self._parse_exists() 5949 old_column = self._parse_column() 5950 to = self._match_text_seq("TO") 5951 new_column = self._parse_column() 5952 5953 if old_column is None or to is None or new_column is None: 5954 return None 5955 5956 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5957 5958 self._match_text_seq("TO") 5959 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5960 5961 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5962 start = self._prev 5963 5964 if not self._match(TokenType.TABLE): 5965 return self._parse_as_command(start) 5966 5967 exists = self._parse_exists() 5968 only = self._match_text_seq("ONLY") 5969 this = self._parse_table(schema=True) 5970 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5971 5972 if self._next: 5973 self._advance() 5974 5975 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5976 if parser: 5977 actions = ensure_list(parser(self)) 5978 options = self._parse_csv(self._parse_property) 5979 5980 if not self._curr and actions: 5981 return self.expression( 5982 exp.AlterTable, 5983 this=this, 5984 exists=exists, 5985 actions=actions, 5986 only=only, 5987 options=options, 5988 cluster=cluster, 5989 ) 5990 5991 return self._parse_as_command(start) 5992 5993 def _parse_merge(self) -> exp.Merge: 5994 self._match(TokenType.INTO) 5995 target = self._parse_table() 5996 5997 if target and self._match(TokenType.ALIAS, advance=False): 5998 target.set("alias", self._parse_table_alias()) 5999 6000 self._match(TokenType.USING) 6001 using = self._parse_table() 6002 6003 self._match(TokenType.ON) 6004 on = self._parse_conjunction() 6005 6006 return self.expression( 6007 exp.Merge, 6008 this=target, 6009 using=using, 6010 on=on, 6011 expressions=self._parse_when_matched(), 6012 ) 6013 6014 def _parse_when_matched(self) -> t.List[exp.When]: 6015 whens = [] 6016 6017 while self._match(TokenType.WHEN): 6018 matched = not self._match(TokenType.NOT) 6019 self._match_text_seq("MATCHED") 6020 source = ( 6021 False 6022 if self._match_text_seq("BY", "TARGET") 6023 else self._match_text_seq("BY", "SOURCE") 6024 ) 6025 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6026 6027 self._match(TokenType.THEN) 6028 6029 if self._match(TokenType.INSERT): 6030 _this = self._parse_star() 6031 if _this: 6032 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6033 else: 6034 then = self.expression( 6035 exp.Insert, 6036 this=self._parse_value(), 6037 expression=self._match_text_seq("VALUES") and self._parse_value(), 6038 ) 6039 elif self._match(TokenType.UPDATE): 6040 expressions = self._parse_star() 6041 if expressions: 6042 then = self.expression(exp.Update, expressions=expressions) 6043 else: 6044 then = self.expression( 6045 exp.Update, 6046 expressions=self._match(TokenType.SET) 6047 and self._parse_csv(self._parse_equality), 6048 ) 6049 elif self._match(TokenType.DELETE): 6050 then = self.expression(exp.Var, this=self._prev.text) 6051 else: 6052 then = None 6053 6054 whens.append( 6055 self.expression( 6056 exp.When, 6057 matched=matched, 6058 source=source, 6059 condition=condition, 6060 then=then, 6061 ) 6062 ) 6063 return whens 6064 6065 def _parse_show(self) -> t.Optional[exp.Expression]: 6066 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6067 if parser: 6068 return parser(self) 6069 return self._parse_as_command(self._prev) 6070 6071 def _parse_set_item_assignment( 6072 self, kind: t.Optional[str] = None 6073 ) -> t.Optional[exp.Expression]: 6074 index = self._index 6075 6076 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6077 return self._parse_set_transaction(global_=kind == "GLOBAL") 6078 6079 left = self._parse_primary() or self._parse_column() 6080 assignment_delimiter = self._match_texts(("=", "TO")) 6081 6082 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6083 self._retreat(index) 6084 return None 6085 6086 right = self._parse_statement() or self._parse_id_var() 6087 this = self.expression(exp.EQ, this=left, expression=right) 6088 6089 return self.expression(exp.SetItem, this=this, kind=kind) 6090 6091 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6092 self._match_text_seq("TRANSACTION") 6093 characteristics = self._parse_csv( 6094 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6095 ) 6096 return self.expression( 6097 exp.SetItem, 6098 expressions=characteristics, 6099 kind="TRANSACTION", 6100 **{"global": global_}, # type: ignore 6101 ) 6102 6103 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6104 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6105 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6106 6107 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6108 index = self._index 6109 set_ = self.expression( 6110 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6111 ) 6112 6113 if self._curr: 6114 self._retreat(index) 6115 return self._parse_as_command(self._prev) 6116 6117 return set_ 6118 6119 def _parse_var_from_options( 6120 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6121 ) -> t.Optional[exp.Var]: 6122 start = self._curr 6123 if not start: 6124 return None 6125 6126 option = start.text.upper() 6127 continuations = options.get(option) 6128 6129 index = self._index 6130 self._advance() 6131 for keywords in continuations or []: 6132 if isinstance(keywords, str): 6133 keywords = (keywords,) 6134 6135 if self._match_text_seq(*keywords): 6136 option = f"{option} {' '.join(keywords)}" 6137 break 6138 else: 6139 if continuations or continuations is None: 6140 if raise_unmatched: 6141 self.raise_error(f"Unknown option {option}") 6142 6143 self._retreat(index) 6144 return None 6145 6146 return exp.var(option) 6147 6148 def _parse_as_command(self, start: Token) -> exp.Command: 6149 while self._curr: 6150 self._advance() 6151 text = self._find_sql(start, self._prev) 6152 size = len(start.text) 6153 self._warn_unsupported() 6154 return exp.Command(this=text[:size], expression=text[size:]) 6155 6156 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6157 settings = [] 6158 6159 self._match_l_paren() 6160 kind = self._parse_id_var() 6161 6162 if self._match(TokenType.L_PAREN): 6163 while True: 6164 key = self._parse_id_var() 6165 value = self._parse_primary() 6166 6167 if not key and value is None: 6168 break 6169 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6170 self._match(TokenType.R_PAREN) 6171 6172 self._match_r_paren() 6173 6174 return self.expression( 6175 exp.DictProperty, 6176 this=this, 6177 kind=kind.this if kind else None, 6178 settings=settings, 6179 ) 6180 6181 def _parse_dict_range(self, this: str) -> exp.DictRange: 6182 self._match_l_paren() 6183 has_min = self._match_text_seq("MIN") 6184 if has_min: 6185 min = self._parse_var() or self._parse_primary() 6186 self._match_text_seq("MAX") 6187 max = self._parse_var() or self._parse_primary() 6188 else: 6189 max = self._parse_var() or self._parse_primary() 6190 min = exp.Literal.number(0) 6191 self._match_r_paren() 6192 return self.expression(exp.DictRange, this=this, min=min, max=max) 6193 6194 def _parse_comprehension( 6195 self, this: t.Optional[exp.Expression] 6196 ) -> t.Optional[exp.Comprehension]: 6197 index = self._index 6198 expression = self._parse_column() 6199 if not self._match(TokenType.IN): 6200 self._retreat(index - 1) 6201 return None 6202 iterator = self._parse_column() 6203 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6204 return self.expression( 6205 exp.Comprehension, 6206 this=this, 6207 expression=expression, 6208 iterator=iterator, 6209 condition=condition, 6210 ) 6211 6212 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6213 if self._match(TokenType.HEREDOC_STRING): 6214 return self.expression(exp.Heredoc, this=self._prev.text) 6215 6216 if not self._match_text_seq("$"): 6217 return None 6218 6219 tags = ["$"] 6220 tag_text = None 6221 6222 if self._is_connected(): 6223 self._advance() 6224 tags.append(self._prev.text.upper()) 6225 else: 6226 self.raise_error("No closing $ found") 6227 6228 if tags[-1] != "$": 6229 if self._is_connected() and self._match_text_seq("$"): 6230 tag_text = tags[-1] 6231 tags.append("$") 6232 else: 6233 self.raise_error("No closing $ found") 6234 6235 heredoc_start = self._curr 6236 6237 while self._curr: 6238 if self._match_text_seq(*tags, advance=False): 6239 this = self._find_sql(heredoc_start, self._prev) 6240 self._advance(len(tags)) 6241 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6242 6243 self._advance() 6244 6245 self.raise_error(f"No closing {''.join(tags)} found") 6246 return None 6247 6248 def _find_parser( 6249 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6250 ) -> t.Optional[t.Callable]: 6251 if not self._curr: 6252 return None 6253 6254 index = self._index 6255 this = [] 6256 while True: 6257 # The current token might be multiple words 6258 curr = self._curr.text.upper() 6259 key = curr.split(" ") 6260 this.append(curr) 6261 6262 self._advance() 6263 result, trie = in_trie(trie, key) 6264 if result == TrieResult.FAILED: 6265 break 6266 6267 if result == TrieResult.EXISTS: 6268 subparser = parsers[" ".join(this)] 6269 return subparser 6270 6271 self._retreat(index) 6272 return None 6273 6274 def _match(self, token_type, advance=True, expression=None): 6275 if not self._curr: 6276 return None 6277 6278 if self._curr.token_type == token_type: 6279 if advance: 6280 self._advance() 6281 self._add_comments(expression) 6282 return True 6283 6284 return None 6285 6286 def _match_set(self, types, advance=True): 6287 if not self._curr: 6288 return None 6289 6290 if self._curr.token_type in types: 6291 if advance: 6292 self._advance() 6293 return True 6294 6295 return None 6296 6297 def _match_pair(self, token_type_a, token_type_b, advance=True): 6298 if not self._curr or not self._next: 6299 return None 6300 6301 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6302 if advance: 6303 self._advance(2) 6304 return True 6305 6306 return None 6307 6308 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6309 if not self._match(TokenType.L_PAREN, expression=expression): 6310 self.raise_error("Expecting (") 6311 6312 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6313 if not self._match(TokenType.R_PAREN, expression=expression): 6314 self.raise_error("Expecting )") 6315 6316 def _match_texts(self, texts, advance=True): 6317 if self._curr and self._curr.text.upper() in texts: 6318 if advance: 6319 self._advance() 6320 return True 6321 return None 6322 6323 def _match_text_seq(self, *texts, advance=True): 6324 index = self._index 6325 for text in texts: 6326 if self._curr and self._curr.text.upper() == text: 6327 self._advance() 6328 else: 6329 self._retreat(index) 6330 return None 6331 6332 if not advance: 6333 self._retreat(index) 6334 6335 return True 6336 6337 def _replace_lambda( 6338 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6339 ) -> t.Optional[exp.Expression]: 6340 if not node: 6341 return node 6342 6343 for column in node.find_all(exp.Column): 6344 if column.parts[0].name in lambda_variables: 6345 dot_or_id = column.to_dot() if column.table else column.this 6346 parent = column.parent 6347 6348 while isinstance(parent, exp.Dot): 6349 if not isinstance(parent.parent, exp.Dot): 6350 parent.replace(dot_or_id) 6351 break 6352 parent = parent.parent 6353 else: 6354 if column is node: 6355 node = dot_or_id 6356 else: 6357 column.replace(dot_or_id) 6358 return node 6359 6360 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6361 start = self._prev 6362 6363 # Not to be confused with TRUNCATE(number, decimals) function call 6364 if self._match(TokenType.L_PAREN): 6365 self._retreat(self._index - 2) 6366 return self._parse_function() 6367 6368 # Clickhouse supports TRUNCATE DATABASE as well 6369 is_database = self._match(TokenType.DATABASE) 6370 6371 self._match(TokenType.TABLE) 6372 6373 exists = self._parse_exists(not_=False) 6374 6375 expressions = self._parse_csv( 6376 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6377 ) 6378 6379 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6380 6381 if self._match_text_seq("RESTART", "IDENTITY"): 6382 identity = "RESTART" 6383 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6384 identity = "CONTINUE" 6385 else: 6386 identity = None 6387 6388 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6389 option = self._prev.text 6390 else: 6391 option = None 6392 6393 partition = self._parse_partition() 6394 6395 # Fallback case 6396 if self._curr: 6397 return self._parse_as_command(start) 6398 6399 return self.expression( 6400 exp.TruncateTable, 6401 expressions=expressions, 6402 is_database=is_database, 6403 exists=exists, 6404 cluster=cluster, 6405 identity=identity, 6406 option=option, 6407 partition=partition, 6408 ) 6409 6410 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6411 this = self._parse_ordered(self._parse_opclass) 6412 6413 if not self._match(TokenType.WITH): 6414 return this 6415 6416 op = self._parse_var(any_token=True) 6417 6418 return self.expression(exp.WithOperator, this=this, op=op) 6419 6420 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6421 opts = [] 6422 self._match(TokenType.EQ) 6423 self._match(TokenType.L_PAREN) 6424 while self._curr and not self._match(TokenType.R_PAREN): 6425 opts.append(self._parse_conjunction()) 6426 self._match(TokenType.COMMA) 6427 return opts 6428 6429 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6430 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6431 6432 options = [] 6433 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6434 option = self._parse_unquoted_field() 6435 value = None 6436 6437 # Some options are defined as functions with the values as params 6438 if not isinstance(option, exp.Func): 6439 prev = self._prev.text.upper() 6440 # Different dialects might separate options and values by white space, "=" and "AS" 6441 self._match(TokenType.EQ) 6442 self._match(TokenType.ALIAS) 6443 6444 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6445 # Snowflake FILE_FORMAT case 6446 value = self._parse_wrapped_options() 6447 else: 6448 value = self._parse_unquoted_field() 6449 6450 param = self.expression(exp.CopyParameter, this=option, expression=value) 6451 options.append(param) 6452 6453 if sep: 6454 self._match(sep) 6455 6456 return options 6457 6458 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6459 expr = self.expression(exp.Credentials) 6460 6461 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6462 expr.set("storage", self._parse_conjunction()) 6463 if self._match_text_seq("CREDENTIALS"): 6464 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6465 creds = ( 6466 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6467 ) 6468 expr.set("credentials", creds) 6469 if self._match_text_seq("ENCRYPTION"): 6470 expr.set("encryption", self._parse_wrapped_options()) 6471 if self._match_text_seq("IAM_ROLE"): 6472 expr.set("iam_role", self._parse_field()) 6473 if self._match_text_seq("REGION"): 6474 expr.set("region", self._parse_field()) 6475 6476 return expr 6477 6478 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6479 return self._parse_field() 6480 6481 def _parse_copy(self) -> exp.Copy | exp.Command: 6482 start = self._prev 6483 6484 self._match(TokenType.INTO) 6485 6486 this = ( 6487 self._parse_conjunction() 6488 if self._match(TokenType.L_PAREN, advance=False) 6489 else self._parse_table(schema=True) 6490 ) 6491 6492 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6493 6494 files = self._parse_csv(self._parse_file_location) 6495 credentials = self._parse_credentials() 6496 6497 self._match_text_seq("WITH") 6498 6499 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6500 6501 # Fallback case 6502 if self._curr: 6503 return self._parse_as_command(start) 6504 6505 return self.expression( 6506 exp.Copy, 6507 this=this, 6508 kind=kind, 6509 credentials=credentials, 6510 files=files, 6511 params=params, 6512 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
82def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 83 def _builder(args: t.List, dialect: Dialect) -> E: 84 expression = expr_type( 85 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 86 ) 87 if len(args) > 2 and expr_type is exp.JSONExtract: 88 expression.set("expressions", args[2:]) 89 90 return expression 91 92 return _builder
95def build_mod(args: t.List) -> exp.Mod: 96 this = seq_get(args, 0) 97 expression = seq_get(args, 1) 98 99 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 100 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 101 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 102 103 return exp.Mod(this=this, expression=expression)
116class Parser(metaclass=_Parser): 117 """ 118 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 119 120 Args: 121 error_level: The desired error level. 122 Default: ErrorLevel.IMMEDIATE 123 error_message_context: The amount of context to capture from a query string when displaying 124 the error message (in number of characters). 125 Default: 100 126 max_errors: Maximum number of error messages to include in a raised ParseError. 127 This is only relevant if error_level is ErrorLevel.RAISE. 128 Default: 3 129 """ 130 131 FUNCTIONS: t.Dict[str, t.Callable] = { 132 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 133 "CONCAT": lambda args, dialect: exp.Concat( 134 expressions=args, 135 safe=not dialect.STRICT_STRING_CONCAT, 136 coalesce=dialect.CONCAT_COALESCE, 137 ), 138 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "DATE_TO_DATE_STR": lambda args: exp.Cast( 144 this=seq_get(args, 0), 145 to=exp.DataType(this=exp.DataType.Type.TEXT), 146 ), 147 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 148 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 149 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 150 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 151 "LIKE": build_like, 152 "LOG": build_logarithm, 153 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 154 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 155 "MOD": build_mod, 156 "TIME_TO_TIME_STR": lambda args: exp.Cast( 157 this=seq_get(args, 0), 158 to=exp.DataType(this=exp.DataType.Type.TEXT), 159 ), 160 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 161 this=exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 start=exp.Literal.number(1), 166 length=exp.Literal.number(10), 167 ), 168 "VAR_MAP": build_var_map, 169 "LOWER": build_lower, 170 "UPPER": build_upper, 171 "HEX": build_hex, 172 "TO_HEX": build_hex, 173 } 174 175 NO_PAREN_FUNCTIONS = { 176 TokenType.CURRENT_DATE: exp.CurrentDate, 177 TokenType.CURRENT_DATETIME: exp.CurrentDate, 178 TokenType.CURRENT_TIME: exp.CurrentTime, 179 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 180 TokenType.CURRENT_USER: exp.CurrentUser, 181 } 182 183 STRUCT_TYPE_TOKENS = { 184 TokenType.NESTED, 185 TokenType.OBJECT, 186 TokenType.STRUCT, 187 } 188 189 NESTED_TYPE_TOKENS = { 190 TokenType.ARRAY, 191 TokenType.LOWCARDINALITY, 192 TokenType.MAP, 193 TokenType.NULLABLE, 194 *STRUCT_TYPE_TOKENS, 195 } 196 197 ENUM_TYPE_TOKENS = { 198 TokenType.ENUM, 199 TokenType.ENUM8, 200 TokenType.ENUM16, 201 } 202 203 AGGREGATE_TYPE_TOKENS = { 204 TokenType.AGGREGATEFUNCTION, 205 TokenType.SIMPLEAGGREGATEFUNCTION, 206 } 207 208 TYPE_TOKENS = { 209 TokenType.BIT, 210 TokenType.BOOLEAN, 211 TokenType.TINYINT, 212 TokenType.UTINYINT, 213 TokenType.SMALLINT, 214 TokenType.USMALLINT, 215 TokenType.INT, 216 TokenType.UINT, 217 TokenType.BIGINT, 218 TokenType.UBIGINT, 219 TokenType.INT128, 220 TokenType.UINT128, 221 TokenType.INT256, 222 TokenType.UINT256, 223 TokenType.MEDIUMINT, 224 TokenType.UMEDIUMINT, 225 TokenType.FIXEDSTRING, 226 TokenType.FLOAT, 227 TokenType.DOUBLE, 228 TokenType.CHAR, 229 TokenType.NCHAR, 230 TokenType.VARCHAR, 231 TokenType.NVARCHAR, 232 TokenType.BPCHAR, 233 TokenType.TEXT, 234 TokenType.MEDIUMTEXT, 235 TokenType.LONGTEXT, 236 TokenType.MEDIUMBLOB, 237 TokenType.LONGBLOB, 238 TokenType.BINARY, 239 TokenType.VARBINARY, 240 TokenType.JSON, 241 TokenType.JSONB, 242 TokenType.INTERVAL, 243 TokenType.TINYBLOB, 244 TokenType.TINYTEXT, 245 TokenType.TIME, 246 TokenType.TIMETZ, 247 TokenType.TIMESTAMP, 248 TokenType.TIMESTAMP_S, 249 TokenType.TIMESTAMP_MS, 250 TokenType.TIMESTAMP_NS, 251 TokenType.TIMESTAMPTZ, 252 TokenType.TIMESTAMPLTZ, 253 TokenType.TIMESTAMPNTZ, 254 TokenType.DATETIME, 255 TokenType.DATETIME64, 256 TokenType.DATE, 257 TokenType.DATE32, 258 TokenType.INT4RANGE, 259 TokenType.INT4MULTIRANGE, 260 TokenType.INT8RANGE, 261 TokenType.INT8MULTIRANGE, 262 TokenType.NUMRANGE, 263 TokenType.NUMMULTIRANGE, 264 TokenType.TSRANGE, 265 TokenType.TSMULTIRANGE, 266 TokenType.TSTZRANGE, 267 TokenType.TSTZMULTIRANGE, 268 TokenType.DATERANGE, 269 TokenType.DATEMULTIRANGE, 270 TokenType.DECIMAL, 271 TokenType.UDECIMAL, 272 TokenType.BIGDECIMAL, 273 TokenType.UUID, 274 TokenType.GEOGRAPHY, 275 TokenType.GEOMETRY, 276 TokenType.HLLSKETCH, 277 TokenType.HSTORE, 278 TokenType.PSEUDO_TYPE, 279 TokenType.SUPER, 280 TokenType.SERIAL, 281 TokenType.SMALLSERIAL, 282 TokenType.BIGSERIAL, 283 TokenType.XML, 284 TokenType.YEAR, 285 TokenType.UNIQUEIDENTIFIER, 286 TokenType.USERDEFINED, 287 TokenType.MONEY, 288 TokenType.SMALLMONEY, 289 TokenType.ROWVERSION, 290 TokenType.IMAGE, 291 TokenType.VARIANT, 292 TokenType.OBJECT, 293 TokenType.OBJECT_IDENTIFIER, 294 TokenType.INET, 295 TokenType.IPADDRESS, 296 TokenType.IPPREFIX, 297 TokenType.IPV4, 298 TokenType.IPV6, 299 TokenType.UNKNOWN, 300 TokenType.NULL, 301 TokenType.NAME, 302 TokenType.TDIGEST, 303 *ENUM_TYPE_TOKENS, 304 *NESTED_TYPE_TOKENS, 305 *AGGREGATE_TYPE_TOKENS, 306 } 307 308 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 309 TokenType.BIGINT: TokenType.UBIGINT, 310 TokenType.INT: TokenType.UINT, 311 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 312 TokenType.SMALLINT: TokenType.USMALLINT, 313 TokenType.TINYINT: TokenType.UTINYINT, 314 TokenType.DECIMAL: TokenType.UDECIMAL, 315 } 316 317 SUBQUERY_PREDICATES = { 318 TokenType.ANY: exp.Any, 319 TokenType.ALL: exp.All, 320 TokenType.EXISTS: exp.Exists, 321 TokenType.SOME: exp.Any, 322 } 323 324 RESERVED_TOKENS = { 325 *Tokenizer.SINGLE_TOKENS.values(), 326 TokenType.SELECT, 327 } - {TokenType.IDENTIFIER} 328 329 DB_CREATABLES = { 330 TokenType.DATABASE, 331 TokenType.DICTIONARY, 332 TokenType.MODEL, 333 TokenType.SCHEMA, 334 TokenType.SEQUENCE, 335 TokenType.STORAGE_INTEGRATION, 336 TokenType.TABLE, 337 TokenType.TAG, 338 TokenType.VIEW, 339 } 340 341 CREATABLES = { 342 TokenType.COLUMN, 343 TokenType.CONSTRAINT, 344 TokenType.FOREIGN_KEY, 345 TokenType.FUNCTION, 346 TokenType.INDEX, 347 TokenType.PROCEDURE, 348 *DB_CREATABLES, 349 } 350 351 # Tokens that can represent identifiers 352 ID_VAR_TOKENS = { 353 TokenType.VAR, 354 TokenType.ANTI, 355 TokenType.APPLY, 356 TokenType.ASC, 357 TokenType.ASOF, 358 TokenType.AUTO_INCREMENT, 359 TokenType.BEGIN, 360 TokenType.BPCHAR, 361 TokenType.CACHE, 362 TokenType.CASE, 363 TokenType.COLLATE, 364 TokenType.COMMAND, 365 TokenType.COMMENT, 366 TokenType.COMMIT, 367 TokenType.CONSTRAINT, 368 TokenType.COPY, 369 TokenType.DEFAULT, 370 TokenType.DELETE, 371 TokenType.DESC, 372 TokenType.DESCRIBE, 373 TokenType.DICTIONARY, 374 TokenType.DIV, 375 TokenType.END, 376 TokenType.EXECUTE, 377 TokenType.ESCAPE, 378 TokenType.FALSE, 379 TokenType.FIRST, 380 TokenType.FILTER, 381 TokenType.FINAL, 382 TokenType.FORMAT, 383 TokenType.FULL, 384 TokenType.IDENTIFIER, 385 TokenType.IS, 386 TokenType.ISNULL, 387 TokenType.INTERVAL, 388 TokenType.KEEP, 389 TokenType.KILL, 390 TokenType.LEFT, 391 TokenType.LOAD, 392 TokenType.MERGE, 393 TokenType.NATURAL, 394 TokenType.NEXT, 395 TokenType.OFFSET, 396 TokenType.OPERATOR, 397 TokenType.ORDINALITY, 398 TokenType.OVERLAPS, 399 TokenType.OVERWRITE, 400 TokenType.PARTITION, 401 TokenType.PERCENT, 402 TokenType.PIVOT, 403 TokenType.PRAGMA, 404 TokenType.RANGE, 405 TokenType.RECURSIVE, 406 TokenType.REFERENCES, 407 TokenType.REFRESH, 408 TokenType.REPLACE, 409 TokenType.RIGHT, 410 TokenType.ROLLUP, 411 TokenType.ROW, 412 TokenType.ROWS, 413 TokenType.SEMI, 414 TokenType.SET, 415 TokenType.SETTINGS, 416 TokenType.SHOW, 417 TokenType.TEMPORARY, 418 TokenType.TOP, 419 TokenType.TRUE, 420 TokenType.TRUNCATE, 421 TokenType.UNIQUE, 422 TokenType.UNPIVOT, 423 TokenType.UPDATE, 424 TokenType.USE, 425 TokenType.VOLATILE, 426 TokenType.WINDOW, 427 *CREATABLES, 428 *SUBQUERY_PREDICATES, 429 *TYPE_TOKENS, 430 *NO_PAREN_FUNCTIONS, 431 } 432 433 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 434 435 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 436 TokenType.ANTI, 437 TokenType.APPLY, 438 TokenType.ASOF, 439 TokenType.FULL, 440 TokenType.LEFT, 441 TokenType.LOCK, 442 TokenType.NATURAL, 443 TokenType.OFFSET, 444 TokenType.RIGHT, 445 TokenType.SEMI, 446 TokenType.WINDOW, 447 } 448 449 ALIAS_TOKENS = ID_VAR_TOKENS 450 451 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 452 453 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 454 455 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 456 457 FUNC_TOKENS = { 458 TokenType.COLLATE, 459 TokenType.COMMAND, 460 TokenType.CURRENT_DATE, 461 TokenType.CURRENT_DATETIME, 462 TokenType.CURRENT_TIMESTAMP, 463 TokenType.CURRENT_TIME, 464 TokenType.CURRENT_USER, 465 TokenType.FILTER, 466 TokenType.FIRST, 467 TokenType.FORMAT, 468 TokenType.GLOB, 469 TokenType.IDENTIFIER, 470 TokenType.INDEX, 471 TokenType.ISNULL, 472 TokenType.ILIKE, 473 TokenType.INSERT, 474 TokenType.LIKE, 475 TokenType.MERGE, 476 TokenType.OFFSET, 477 TokenType.PRIMARY_KEY, 478 TokenType.RANGE, 479 TokenType.REPLACE, 480 TokenType.RLIKE, 481 TokenType.ROW, 482 TokenType.UNNEST, 483 TokenType.VAR, 484 TokenType.LEFT, 485 TokenType.RIGHT, 486 TokenType.SEQUENCE, 487 TokenType.DATE, 488 TokenType.DATETIME, 489 TokenType.TABLE, 490 TokenType.TIMESTAMP, 491 TokenType.TIMESTAMPTZ, 492 TokenType.TRUNCATE, 493 TokenType.WINDOW, 494 TokenType.XOR, 495 *TYPE_TOKENS, 496 *SUBQUERY_PREDICATES, 497 } 498 499 CONJUNCTION = { 500 TokenType.AND: exp.And, 501 TokenType.OR: exp.Or, 502 } 503 504 EQUALITY = { 505 TokenType.EQ: exp.EQ, 506 TokenType.NEQ: exp.NEQ, 507 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 508 } 509 510 COMPARISON = { 511 TokenType.GT: exp.GT, 512 TokenType.GTE: exp.GTE, 513 TokenType.LT: exp.LT, 514 TokenType.LTE: exp.LTE, 515 } 516 517 BITWISE = { 518 TokenType.AMP: exp.BitwiseAnd, 519 TokenType.CARET: exp.BitwiseXor, 520 TokenType.PIPE: exp.BitwiseOr, 521 } 522 523 TERM = { 524 TokenType.DASH: exp.Sub, 525 TokenType.PLUS: exp.Add, 526 TokenType.MOD: exp.Mod, 527 TokenType.COLLATE: exp.Collate, 528 } 529 530 FACTOR = { 531 TokenType.DIV: exp.IntDiv, 532 TokenType.LR_ARROW: exp.Distance, 533 TokenType.SLASH: exp.Div, 534 TokenType.STAR: exp.Mul, 535 } 536 537 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 538 539 TIMES = { 540 TokenType.TIME, 541 TokenType.TIMETZ, 542 } 543 544 TIMESTAMPS = { 545 TokenType.TIMESTAMP, 546 TokenType.TIMESTAMPTZ, 547 TokenType.TIMESTAMPLTZ, 548 *TIMES, 549 } 550 551 SET_OPERATIONS = { 552 TokenType.UNION, 553 TokenType.INTERSECT, 554 TokenType.EXCEPT, 555 } 556 557 JOIN_METHODS = { 558 TokenType.ASOF, 559 TokenType.NATURAL, 560 TokenType.POSITIONAL, 561 } 562 563 JOIN_SIDES = { 564 TokenType.LEFT, 565 TokenType.RIGHT, 566 TokenType.FULL, 567 } 568 569 JOIN_KINDS = { 570 TokenType.INNER, 571 TokenType.OUTER, 572 TokenType.CROSS, 573 TokenType.SEMI, 574 TokenType.ANTI, 575 } 576 577 JOIN_HINTS: t.Set[str] = set() 578 579 LAMBDAS = { 580 TokenType.ARROW: lambda self, expressions: self.expression( 581 exp.Lambda, 582 this=self._replace_lambda( 583 self._parse_conjunction(), 584 {node.name for node in expressions}, 585 ), 586 expressions=expressions, 587 ), 588 TokenType.FARROW: lambda self, expressions: self.expression( 589 exp.Kwarg, 590 this=exp.var(expressions[0].name), 591 expression=self._parse_conjunction(), 592 ), 593 } 594 595 COLUMN_OPERATORS = { 596 TokenType.DOT: None, 597 TokenType.DCOLON: lambda self, this, to: self.expression( 598 exp.Cast if self.STRICT_CAST else exp.TryCast, 599 this=this, 600 to=to, 601 ), 602 TokenType.ARROW: lambda self, this, path: self.expression( 603 exp.JSONExtract, 604 this=this, 605 expression=self.dialect.to_json_path(path), 606 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 607 ), 608 TokenType.DARROW: lambda self, this, path: self.expression( 609 exp.JSONExtractScalar, 610 this=this, 611 expression=self.dialect.to_json_path(path), 612 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 613 ), 614 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 615 exp.JSONBExtract, 616 this=this, 617 expression=path, 618 ), 619 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 620 exp.JSONBExtractScalar, 621 this=this, 622 expression=path, 623 ), 624 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 625 exp.JSONBContains, 626 this=this, 627 expression=key, 628 ), 629 } 630 631 EXPRESSION_PARSERS = { 632 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 633 exp.Column: lambda self: self._parse_column(), 634 exp.Condition: lambda self: self._parse_conjunction(), 635 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 636 exp.Expression: lambda self: self._parse_expression(), 637 exp.From: lambda self: self._parse_from(joins=True), 638 exp.Group: lambda self: self._parse_group(), 639 exp.Having: lambda self: self._parse_having(), 640 exp.Identifier: lambda self: self._parse_id_var(), 641 exp.Join: lambda self: self._parse_join(), 642 exp.Lambda: lambda self: self._parse_lambda(), 643 exp.Lateral: lambda self: self._parse_lateral(), 644 exp.Limit: lambda self: self._parse_limit(), 645 exp.Offset: lambda self: self._parse_offset(), 646 exp.Order: lambda self: self._parse_order(), 647 exp.Ordered: lambda self: self._parse_ordered(), 648 exp.Properties: lambda self: self._parse_properties(), 649 exp.Qualify: lambda self: self._parse_qualify(), 650 exp.Returning: lambda self: self._parse_returning(), 651 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 652 exp.Table: lambda self: self._parse_table_parts(), 653 exp.TableAlias: lambda self: self._parse_table_alias(), 654 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 655 exp.Where: lambda self: self._parse_where(), 656 exp.Window: lambda self: self._parse_named_window(), 657 exp.With: lambda self: self._parse_with(), 658 "JOIN_TYPE": lambda self: self._parse_join_parts(), 659 } 660 661 STATEMENT_PARSERS = { 662 TokenType.ALTER: lambda self: self._parse_alter(), 663 TokenType.BEGIN: lambda self: self._parse_transaction(), 664 TokenType.CACHE: lambda self: self._parse_cache(), 665 TokenType.COMMENT: lambda self: self._parse_comment(), 666 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 667 TokenType.COPY: lambda self: self._parse_copy(), 668 TokenType.CREATE: lambda self: self._parse_create(), 669 TokenType.DELETE: lambda self: self._parse_delete(), 670 TokenType.DESC: lambda self: self._parse_describe(), 671 TokenType.DESCRIBE: lambda self: self._parse_describe(), 672 TokenType.DROP: lambda self: self._parse_drop(), 673 TokenType.INSERT: lambda self: self._parse_insert(), 674 TokenType.KILL: lambda self: self._parse_kill(), 675 TokenType.LOAD: lambda self: self._parse_load(), 676 TokenType.MERGE: lambda self: self._parse_merge(), 677 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 678 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 679 TokenType.REFRESH: lambda self: self._parse_refresh(), 680 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 681 TokenType.SET: lambda self: self._parse_set(), 682 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 683 TokenType.UNCACHE: lambda self: self._parse_uncache(), 684 TokenType.UPDATE: lambda self: self._parse_update(), 685 TokenType.USE: lambda self: self.expression( 686 exp.Use, 687 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 688 this=self._parse_table(schema=False), 689 ), 690 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 691 } 692 693 UNARY_PARSERS = { 694 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 695 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 696 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 697 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 698 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 699 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 700 } 701 702 STRING_PARSERS = { 703 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 704 exp.RawString, this=token.text 705 ), 706 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 707 exp.National, this=token.text 708 ), 709 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 710 TokenType.STRING: lambda self, token: self.expression( 711 exp.Literal, this=token.text, is_string=True 712 ), 713 TokenType.UNICODE_STRING: lambda self, token: self.expression( 714 exp.UnicodeString, 715 this=token.text, 716 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 717 ), 718 } 719 720 NUMERIC_PARSERS = { 721 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 722 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 723 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 724 TokenType.NUMBER: lambda self, token: self.expression( 725 exp.Literal, this=token.text, is_string=False 726 ), 727 } 728 729 PRIMARY_PARSERS = { 730 **STRING_PARSERS, 731 **NUMERIC_PARSERS, 732 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 733 TokenType.NULL: lambda self, _: self.expression(exp.Null), 734 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 735 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 736 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 737 TokenType.STAR: lambda self, _: self.expression( 738 exp.Star, 739 **{ 740 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 741 "replace": self._parse_star_op("REPLACE"), 742 "rename": self._parse_star_op("RENAME"), 743 }, 744 ), 745 } 746 747 PLACEHOLDER_PARSERS = { 748 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 749 TokenType.PARAMETER: lambda self: self._parse_parameter(), 750 TokenType.COLON: lambda self: ( 751 self.expression(exp.Placeholder, this=self._prev.text) 752 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 753 else None 754 ), 755 } 756 757 RANGE_PARSERS = { 758 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 759 TokenType.GLOB: binary_range_parser(exp.Glob), 760 TokenType.ILIKE: binary_range_parser(exp.ILike), 761 TokenType.IN: lambda self, this: self._parse_in(this), 762 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 763 TokenType.IS: lambda self, this: self._parse_is(this), 764 TokenType.LIKE: binary_range_parser(exp.Like), 765 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 766 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 767 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 768 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 769 } 770 771 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 772 "ALLOWED_VALUES": lambda self: self.expression( 773 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 774 ), 775 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 776 "AUTO": lambda self: self._parse_auto_property(), 777 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 778 "BACKUP": lambda self: self.expression( 779 exp.BackupProperty, this=self._parse_var(any_token=True) 780 ), 781 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 782 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 783 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 784 "CHECKSUM": lambda self: self._parse_checksum(), 785 "CLUSTER BY": lambda self: self._parse_cluster(), 786 "CLUSTERED": lambda self: self._parse_clustered_by(), 787 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 788 exp.CollateProperty, **kwargs 789 ), 790 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 791 "CONTAINS": lambda self: self._parse_contains_property(), 792 "COPY": lambda self: self._parse_copy_property(), 793 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 794 "DEFINER": lambda self: self._parse_definer(), 795 "DETERMINISTIC": lambda self: self.expression( 796 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 797 ), 798 "DISTKEY": lambda self: self._parse_distkey(), 799 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 800 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 801 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 802 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 803 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 804 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 805 "FREESPACE": lambda self: self._parse_freespace(), 806 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 807 "HEAP": lambda self: self.expression(exp.HeapProperty), 808 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 809 "IMMUTABLE": lambda self: self.expression( 810 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 811 ), 812 "INHERITS": lambda self: self.expression( 813 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 814 ), 815 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 816 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 817 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 818 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 819 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 820 "LIKE": lambda self: self._parse_create_like(), 821 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 822 "LOCK": lambda self: self._parse_locking(), 823 "LOCKING": lambda self: self._parse_locking(), 824 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 825 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 826 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 827 "MODIFIES": lambda self: self._parse_modifies_property(), 828 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 829 "NO": lambda self: self._parse_no_property(), 830 "ON": lambda self: self._parse_on_property(), 831 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 832 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 833 "PARTITION": lambda self: self._parse_partitioned_of(), 834 "PARTITION BY": lambda self: self._parse_partitioned_by(), 835 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 836 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 837 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 838 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 839 "READS": lambda self: self._parse_reads_property(), 840 "REMOTE": lambda self: self._parse_remote_with_connection(), 841 "RETURNS": lambda self: self._parse_returns(), 842 "ROW": lambda self: self._parse_row(), 843 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 844 "SAMPLE": lambda self: self.expression( 845 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 846 ), 847 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 848 "SETTINGS": lambda self: self.expression( 849 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 850 ), 851 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 852 "SORTKEY": lambda self: self._parse_sortkey(), 853 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 854 "STABLE": lambda self: self.expression( 855 exp.StabilityProperty, this=exp.Literal.string("STABLE") 856 ), 857 "STORED": lambda self: self._parse_stored(), 858 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 859 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 860 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 861 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 862 "TO": lambda self: self._parse_to_table(), 863 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 864 "TRANSFORM": lambda self: self.expression( 865 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 866 ), 867 "TTL": lambda self: self._parse_ttl(), 868 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 869 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 870 "VOLATILE": lambda self: self._parse_volatile_property(), 871 "WITH": lambda self: self._parse_with_property(), 872 } 873 874 CONSTRAINT_PARSERS = { 875 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 876 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 877 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 878 "CHARACTER SET": lambda self: self.expression( 879 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 880 ), 881 "CHECK": lambda self: self.expression( 882 exp.CheckColumnConstraint, 883 this=self._parse_wrapped(self._parse_conjunction), 884 enforced=self._match_text_seq("ENFORCED"), 885 ), 886 "COLLATE": lambda self: self.expression( 887 exp.CollateColumnConstraint, this=self._parse_var() 888 ), 889 "COMMENT": lambda self: self.expression( 890 exp.CommentColumnConstraint, this=self._parse_string() 891 ), 892 "COMPRESS": lambda self: self._parse_compress(), 893 "CLUSTERED": lambda self: self.expression( 894 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 895 ), 896 "NONCLUSTERED": lambda self: self.expression( 897 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 898 ), 899 "DEFAULT": lambda self: self.expression( 900 exp.DefaultColumnConstraint, this=self._parse_bitwise() 901 ), 902 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 903 "EPHEMERAL": lambda self: self.expression( 904 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 905 ), 906 "EXCLUDE": lambda self: self.expression( 907 exp.ExcludeColumnConstraint, this=self._parse_index_params() 908 ), 909 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 910 "FORMAT": lambda self: self.expression( 911 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 912 ), 913 "GENERATED": lambda self: self._parse_generated_as_identity(), 914 "IDENTITY": lambda self: self._parse_auto_increment(), 915 "INLINE": lambda self: self._parse_inline(), 916 "LIKE": lambda self: self._parse_create_like(), 917 "NOT": lambda self: self._parse_not_constraint(), 918 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 919 "ON": lambda self: ( 920 self._match(TokenType.UPDATE) 921 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 922 ) 923 or self.expression(exp.OnProperty, this=self._parse_id_var()), 924 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 925 "PERIOD": lambda self: self._parse_period_for_system_time(), 926 "PRIMARY KEY": lambda self: self._parse_primary_key(), 927 "REFERENCES": lambda self: self._parse_references(match=False), 928 "TITLE": lambda self: self.expression( 929 exp.TitleColumnConstraint, this=self._parse_var_or_string() 930 ), 931 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 932 "UNIQUE": lambda self: self._parse_unique(), 933 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 934 "WITH": lambda self: self.expression( 935 exp.Properties, expressions=self._parse_wrapped_properties() 936 ), 937 } 938 939 ALTER_PARSERS = { 940 "ADD": lambda self: self._parse_alter_table_add(), 941 "ALTER": lambda self: self._parse_alter_table_alter(), 942 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 943 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 944 "DROP": lambda self: self._parse_alter_table_drop(), 945 "RENAME": lambda self: self._parse_alter_table_rename(), 946 } 947 948 ALTER_ALTER_PARSERS = { 949 "DISTKEY": lambda self: self._parse_alter_diststyle(), 950 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 951 "SORTKEY": lambda self: self._parse_alter_sortkey(), 952 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 953 } 954 955 SCHEMA_UNNAMED_CONSTRAINTS = { 956 "CHECK", 957 "EXCLUDE", 958 "FOREIGN KEY", 959 "LIKE", 960 "PERIOD", 961 "PRIMARY KEY", 962 "UNIQUE", 963 } 964 965 NO_PAREN_FUNCTION_PARSERS = { 966 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 967 "CASE": lambda self: self._parse_case(), 968 "IF": lambda self: self._parse_if(), 969 "NEXT": lambda self: self._parse_next_value_for(), 970 } 971 972 INVALID_FUNC_NAME_TOKENS = { 973 TokenType.IDENTIFIER, 974 TokenType.STRING, 975 } 976 977 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 978 979 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 980 981 FUNCTION_PARSERS = { 982 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 983 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 984 "DECODE": lambda self: self._parse_decode(), 985 "EXTRACT": lambda self: self._parse_extract(), 986 "JSON_OBJECT": lambda self: self._parse_json_object(), 987 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 988 "JSON_TABLE": lambda self: self._parse_json_table(), 989 "MATCH": lambda self: self._parse_match_against(), 990 "OPENJSON": lambda self: self._parse_open_json(), 991 "POSITION": lambda self: self._parse_position(), 992 "PREDICT": lambda self: self._parse_predict(), 993 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 994 "STRING_AGG": lambda self: self._parse_string_agg(), 995 "SUBSTRING": lambda self: self._parse_substring(), 996 "TRIM": lambda self: self._parse_trim(), 997 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 998 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 999 } 1000 1001 QUERY_MODIFIER_PARSERS = { 1002 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1003 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1004 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1005 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1006 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1007 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1008 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1009 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1010 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1011 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1012 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1013 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1014 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1015 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1016 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1017 TokenType.CLUSTER_BY: lambda self: ( 1018 "cluster", 1019 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1020 ), 1021 TokenType.DISTRIBUTE_BY: lambda self: ( 1022 "distribute", 1023 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1024 ), 1025 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1026 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1027 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1028 } 1029 1030 SET_PARSERS = { 1031 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1032 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1033 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1034 "TRANSACTION": lambda self: self._parse_set_transaction(), 1035 } 1036 1037 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1038 1039 TYPE_LITERAL_PARSERS = { 1040 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1041 } 1042 1043 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1044 1045 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1046 1047 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1048 1049 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1050 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1051 "ISOLATION": ( 1052 ("LEVEL", "REPEATABLE", "READ"), 1053 ("LEVEL", "READ", "COMMITTED"), 1054 ("LEVEL", "READ", "UNCOMITTED"), 1055 ("LEVEL", "SERIALIZABLE"), 1056 ), 1057 "READ": ("WRITE", "ONLY"), 1058 } 1059 1060 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1061 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1062 ) 1063 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1064 1065 CREATE_SEQUENCE: OPTIONS_TYPE = { 1066 "SCALE": ("EXTEND", "NOEXTEND"), 1067 "SHARD": ("EXTEND", "NOEXTEND"), 1068 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1069 **dict.fromkeys( 1070 ( 1071 "SESSION", 1072 "GLOBAL", 1073 "KEEP", 1074 "NOKEEP", 1075 "ORDER", 1076 "NOORDER", 1077 "NOCACHE", 1078 "CYCLE", 1079 "NOCYCLE", 1080 "NOMINVALUE", 1081 "NOMAXVALUE", 1082 "NOSCALE", 1083 "NOSHARD", 1084 ), 1085 tuple(), 1086 ), 1087 } 1088 1089 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1090 1091 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1092 1093 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1094 1095 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1096 1097 CLONE_KEYWORDS = {"CLONE", "COPY"} 1098 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1099 1100 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1101 1102 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1103 1104 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1105 1106 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1107 1108 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1109 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1110 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1111 1112 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1113 1114 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1115 1116 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1117 1118 DISTINCT_TOKENS = {TokenType.DISTINCT} 1119 1120 NULL_TOKENS = {TokenType.NULL} 1121 1122 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1123 1124 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1125 1126 STRICT_CAST = True 1127 1128 PREFIXED_PIVOT_COLUMNS = False 1129 IDENTIFY_PIVOT_STRINGS = False 1130 1131 LOG_DEFAULTS_TO_LN = False 1132 1133 # Whether ADD is present for each column added by ALTER TABLE 1134 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1135 1136 # Whether the table sample clause expects CSV syntax 1137 TABLESAMPLE_CSV = False 1138 1139 # The default method used for table sampling 1140 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1141 1142 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1143 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1144 1145 # Whether the TRIM function expects the characters to trim as its first argument 1146 TRIM_PATTERN_FIRST = False 1147 1148 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1149 STRING_ALIASES = False 1150 1151 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1152 MODIFIERS_ATTACHED_TO_UNION = True 1153 UNION_MODIFIERS = {"order", "limit", "offset"} 1154 1155 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1156 NO_PAREN_IF_COMMANDS = True 1157 1158 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1159 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1160 1161 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1162 # If this is True and '(' is not found, the keyword will be treated as an identifier 1163 VALUES_FOLLOWED_BY_PAREN = True 1164 1165 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1166 SUPPORTS_IMPLICIT_UNNEST = False 1167 1168 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1169 INTERVAL_SPANS = True 1170 1171 # Whether a PARTITION clause can follow a table reference 1172 SUPPORTS_PARTITION_SELECTION = False 1173 1174 __slots__ = ( 1175 "error_level", 1176 "error_message_context", 1177 "max_errors", 1178 "dialect", 1179 "sql", 1180 "errors", 1181 "_tokens", 1182 "_index", 1183 "_curr", 1184 "_next", 1185 "_prev", 1186 "_prev_comments", 1187 ) 1188 1189 # Autofilled 1190 SHOW_TRIE: t.Dict = {} 1191 SET_TRIE: t.Dict = {} 1192 1193 def __init__( 1194 self, 1195 error_level: t.Optional[ErrorLevel] = None, 1196 error_message_context: int = 100, 1197 max_errors: int = 3, 1198 dialect: DialectType = None, 1199 ): 1200 from sqlglot.dialects import Dialect 1201 1202 self.error_level = error_level or ErrorLevel.IMMEDIATE 1203 self.error_message_context = error_message_context 1204 self.max_errors = max_errors 1205 self.dialect = Dialect.get_or_raise(dialect) 1206 self.reset() 1207 1208 def reset(self): 1209 self.sql = "" 1210 self.errors = [] 1211 self._tokens = [] 1212 self._index = 0 1213 self._curr = None 1214 self._next = None 1215 self._prev = None 1216 self._prev_comments = None 1217 1218 def parse( 1219 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1220 ) -> t.List[t.Optional[exp.Expression]]: 1221 """ 1222 Parses a list of tokens and returns a list of syntax trees, one tree 1223 per parsed SQL statement. 1224 1225 Args: 1226 raw_tokens: The list of tokens. 1227 sql: The original SQL string, used to produce helpful debug messages. 1228 1229 Returns: 1230 The list of the produced syntax trees. 1231 """ 1232 return self._parse( 1233 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1234 ) 1235 1236 def parse_into( 1237 self, 1238 expression_types: exp.IntoType, 1239 raw_tokens: t.List[Token], 1240 sql: t.Optional[str] = None, 1241 ) -> t.List[t.Optional[exp.Expression]]: 1242 """ 1243 Parses a list of tokens into a given Expression type. If a collection of Expression 1244 types is given instead, this method will try to parse the token list into each one 1245 of them, stopping at the first for which the parsing succeeds. 1246 1247 Args: 1248 expression_types: The expression type(s) to try and parse the token list into. 1249 raw_tokens: The list of tokens. 1250 sql: The original SQL string, used to produce helpful debug messages. 1251 1252 Returns: 1253 The target Expression. 1254 """ 1255 errors = [] 1256 for expression_type in ensure_list(expression_types): 1257 parser = self.EXPRESSION_PARSERS.get(expression_type) 1258 if not parser: 1259 raise TypeError(f"No parser registered for {expression_type}") 1260 1261 try: 1262 return self._parse(parser, raw_tokens, sql) 1263 except ParseError as e: 1264 e.errors[0]["into_expression"] = expression_type 1265 errors.append(e) 1266 1267 raise ParseError( 1268 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1269 errors=merge_errors(errors), 1270 ) from errors[-1] 1271 1272 def _parse( 1273 self, 1274 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1275 raw_tokens: t.List[Token], 1276 sql: t.Optional[str] = None, 1277 ) -> t.List[t.Optional[exp.Expression]]: 1278 self.reset() 1279 self.sql = sql or "" 1280 1281 total = len(raw_tokens) 1282 chunks: t.List[t.List[Token]] = [[]] 1283 1284 for i, token in enumerate(raw_tokens): 1285 if token.token_type == TokenType.SEMICOLON: 1286 if token.comments: 1287 chunks.append([token]) 1288 1289 if i < total - 1: 1290 chunks.append([]) 1291 else: 1292 chunks[-1].append(token) 1293 1294 expressions = [] 1295 1296 for tokens in chunks: 1297 self._index = -1 1298 self._tokens = tokens 1299 self._advance() 1300 1301 expressions.append(parse_method(self)) 1302 1303 if self._index < len(self._tokens): 1304 self.raise_error("Invalid expression / Unexpected token") 1305 1306 self.check_errors() 1307 1308 return expressions 1309 1310 def check_errors(self) -> None: 1311 """Logs or raises any found errors, depending on the chosen error level setting.""" 1312 if self.error_level == ErrorLevel.WARN: 1313 for error in self.errors: 1314 logger.error(str(error)) 1315 elif self.error_level == ErrorLevel.RAISE and self.errors: 1316 raise ParseError( 1317 concat_messages(self.errors, self.max_errors), 1318 errors=merge_errors(self.errors), 1319 ) 1320 1321 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1322 """ 1323 Appends an error in the list of recorded errors or raises it, depending on the chosen 1324 error level setting. 1325 """ 1326 token = token or self._curr or self._prev or Token.string("") 1327 start = token.start 1328 end = token.end + 1 1329 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1330 highlight = self.sql[start:end] 1331 end_context = self.sql[end : end + self.error_message_context] 1332 1333 error = ParseError.new( 1334 f"{message}. Line {token.line}, Col: {token.col}.\n" 1335 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1336 description=message, 1337 line=token.line, 1338 col=token.col, 1339 start_context=start_context, 1340 highlight=highlight, 1341 end_context=end_context, 1342 ) 1343 1344 if self.error_level == ErrorLevel.IMMEDIATE: 1345 raise error 1346 1347 self.errors.append(error) 1348 1349 def expression( 1350 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1351 ) -> E: 1352 """ 1353 Creates a new, validated Expression. 1354 1355 Args: 1356 exp_class: The expression class to instantiate. 1357 comments: An optional list of comments to attach to the expression. 1358 kwargs: The arguments to set for the expression along with their respective values. 1359 1360 Returns: 1361 The target expression. 1362 """ 1363 instance = exp_class(**kwargs) 1364 instance.add_comments(comments) if comments else self._add_comments(instance) 1365 return self.validate_expression(instance) 1366 1367 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1368 if expression and self._prev_comments: 1369 expression.add_comments(self._prev_comments) 1370 self._prev_comments = None 1371 1372 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1373 """ 1374 Validates an Expression, making sure that all its mandatory arguments are set. 1375 1376 Args: 1377 expression: The expression to validate. 1378 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1379 1380 Returns: 1381 The validated expression. 1382 """ 1383 if self.error_level != ErrorLevel.IGNORE: 1384 for error_message in expression.error_messages(args): 1385 self.raise_error(error_message) 1386 1387 return expression 1388 1389 def _find_sql(self, start: Token, end: Token) -> str: 1390 return self.sql[start.start : end.end + 1] 1391 1392 def _is_connected(self) -> bool: 1393 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1394 1395 def _advance(self, times: int = 1) -> None: 1396 self._index += times 1397 self._curr = seq_get(self._tokens, self._index) 1398 self._next = seq_get(self._tokens, self._index + 1) 1399 1400 if self._index > 0: 1401 self._prev = self._tokens[self._index - 1] 1402 self._prev_comments = self._prev.comments 1403 else: 1404 self._prev = None 1405 self._prev_comments = None 1406 1407 def _retreat(self, index: int) -> None: 1408 if index != self._index: 1409 self._advance(index - self._index) 1410 1411 def _warn_unsupported(self) -> None: 1412 if len(self._tokens) <= 1: 1413 return 1414 1415 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1416 # interested in emitting a warning for the one being currently processed. 1417 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1418 1419 logger.warning( 1420 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1421 ) 1422 1423 def _parse_command(self) -> exp.Command: 1424 self._warn_unsupported() 1425 return self.expression( 1426 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1427 ) 1428 1429 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1430 """ 1431 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1432 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1433 the parser state accordingly 1434 """ 1435 index = self._index 1436 error_level = self.error_level 1437 1438 self.error_level = ErrorLevel.IMMEDIATE 1439 try: 1440 this = parse_method() 1441 except ParseError: 1442 this = None 1443 finally: 1444 if not this or retreat: 1445 self._retreat(index) 1446 self.error_level = error_level 1447 1448 return this 1449 1450 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1451 start = self._prev 1452 exists = self._parse_exists() if allow_exists else None 1453 1454 self._match(TokenType.ON) 1455 1456 materialized = self._match_text_seq("MATERIALIZED") 1457 kind = self._match_set(self.CREATABLES) and self._prev 1458 if not kind: 1459 return self._parse_as_command(start) 1460 1461 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1462 this = self._parse_user_defined_function(kind=kind.token_type) 1463 elif kind.token_type == TokenType.TABLE: 1464 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1465 elif kind.token_type == TokenType.COLUMN: 1466 this = self._parse_column() 1467 else: 1468 this = self._parse_id_var() 1469 1470 self._match(TokenType.IS) 1471 1472 return self.expression( 1473 exp.Comment, 1474 this=this, 1475 kind=kind.text, 1476 expression=self._parse_string(), 1477 exists=exists, 1478 materialized=materialized, 1479 ) 1480 1481 def _parse_to_table( 1482 self, 1483 ) -> exp.ToTableProperty: 1484 table = self._parse_table_parts(schema=True) 1485 return self.expression(exp.ToTableProperty, this=table) 1486 1487 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1488 def _parse_ttl(self) -> exp.Expression: 1489 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1490 this = self._parse_bitwise() 1491 1492 if self._match_text_seq("DELETE"): 1493 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1494 if self._match_text_seq("RECOMPRESS"): 1495 return self.expression( 1496 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1497 ) 1498 if self._match_text_seq("TO", "DISK"): 1499 return self.expression( 1500 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1501 ) 1502 if self._match_text_seq("TO", "VOLUME"): 1503 return self.expression( 1504 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1505 ) 1506 1507 return this 1508 1509 expressions = self._parse_csv(_parse_ttl_action) 1510 where = self._parse_where() 1511 group = self._parse_group() 1512 1513 aggregates = None 1514 if group and self._match(TokenType.SET): 1515 aggregates = self._parse_csv(self._parse_set_item) 1516 1517 return self.expression( 1518 exp.MergeTreeTTL, 1519 expressions=expressions, 1520 where=where, 1521 group=group, 1522 aggregates=aggregates, 1523 ) 1524 1525 def _parse_statement(self) -> t.Optional[exp.Expression]: 1526 if self._curr is None: 1527 return None 1528 1529 if self._match_set(self.STATEMENT_PARSERS): 1530 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1531 1532 if self._match_set(self.dialect.tokenizer.COMMANDS): 1533 return self._parse_command() 1534 1535 expression = self._parse_expression() 1536 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1537 return self._parse_query_modifiers(expression) 1538 1539 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1540 start = self._prev 1541 temporary = self._match(TokenType.TEMPORARY) 1542 materialized = self._match_text_seq("MATERIALIZED") 1543 1544 kind = self._match_set(self.CREATABLES) and self._prev.text 1545 if not kind: 1546 return self._parse_as_command(start) 1547 1548 if_exists = exists or self._parse_exists() 1549 table = self._parse_table_parts( 1550 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1551 ) 1552 1553 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1554 1555 if self._match(TokenType.L_PAREN, advance=False): 1556 expressions = self._parse_wrapped_csv(self._parse_types) 1557 else: 1558 expressions = None 1559 1560 return self.expression( 1561 exp.Drop, 1562 comments=start.comments, 1563 exists=if_exists, 1564 this=table, 1565 expressions=expressions, 1566 kind=kind.upper(), 1567 temporary=temporary, 1568 materialized=materialized, 1569 cascade=self._match_text_seq("CASCADE"), 1570 constraints=self._match_text_seq("CONSTRAINTS"), 1571 purge=self._match_text_seq("PURGE"), 1572 cluster=cluster, 1573 ) 1574 1575 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1576 return ( 1577 self._match_text_seq("IF") 1578 and (not not_ or self._match(TokenType.NOT)) 1579 and self._match(TokenType.EXISTS) 1580 ) 1581 1582 def _parse_create(self) -> exp.Create | exp.Command: 1583 # Note: this can't be None because we've matched a statement parser 1584 start = self._prev 1585 comments = self._prev_comments 1586 1587 replace = ( 1588 start.token_type == TokenType.REPLACE 1589 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1590 or self._match_pair(TokenType.OR, TokenType.ALTER) 1591 ) 1592 1593 unique = self._match(TokenType.UNIQUE) 1594 1595 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1596 self._advance() 1597 1598 properties = None 1599 create_token = self._match_set(self.CREATABLES) and self._prev 1600 1601 if not create_token: 1602 # exp.Properties.Location.POST_CREATE 1603 properties = self._parse_properties() 1604 create_token = self._match_set(self.CREATABLES) and self._prev 1605 1606 if not properties or not create_token: 1607 return self._parse_as_command(start) 1608 1609 exists = self._parse_exists(not_=True) 1610 this = None 1611 expression: t.Optional[exp.Expression] = None 1612 indexes = None 1613 no_schema_binding = None 1614 begin = None 1615 end = None 1616 clone = None 1617 1618 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1619 nonlocal properties 1620 if properties and temp_props: 1621 properties.expressions.extend(temp_props.expressions) 1622 elif temp_props: 1623 properties = temp_props 1624 1625 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1626 this = self._parse_user_defined_function(kind=create_token.token_type) 1627 1628 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1629 extend_props(self._parse_properties()) 1630 1631 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1632 1633 if not expression: 1634 if self._match(TokenType.COMMAND): 1635 expression = self._parse_as_command(self._prev) 1636 else: 1637 begin = self._match(TokenType.BEGIN) 1638 return_ = self._match_text_seq("RETURN") 1639 1640 if self._match(TokenType.STRING, advance=False): 1641 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1642 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1643 expression = self._parse_string() 1644 extend_props(self._parse_properties()) 1645 else: 1646 expression = self._parse_statement() 1647 1648 end = self._match_text_seq("END") 1649 1650 if return_: 1651 expression = self.expression(exp.Return, this=expression) 1652 elif create_token.token_type == TokenType.INDEX: 1653 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1654 if not self._match(TokenType.ON): 1655 index = self._parse_id_var() 1656 anonymous = False 1657 else: 1658 index = None 1659 anonymous = True 1660 1661 this = self._parse_index(index=index, anonymous=anonymous) 1662 elif create_token.token_type in self.DB_CREATABLES: 1663 table_parts = self._parse_table_parts( 1664 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1665 ) 1666 1667 # exp.Properties.Location.POST_NAME 1668 self._match(TokenType.COMMA) 1669 extend_props(self._parse_properties(before=True)) 1670 1671 this = self._parse_schema(this=table_parts) 1672 1673 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1674 extend_props(self._parse_properties()) 1675 1676 self._match(TokenType.ALIAS) 1677 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1678 # exp.Properties.Location.POST_ALIAS 1679 extend_props(self._parse_properties()) 1680 1681 if create_token.token_type == TokenType.SEQUENCE: 1682 expression = self._parse_types() 1683 extend_props(self._parse_properties()) 1684 else: 1685 expression = self._parse_ddl_select() 1686 1687 if create_token.token_type == TokenType.TABLE: 1688 # exp.Properties.Location.POST_EXPRESSION 1689 extend_props(self._parse_properties()) 1690 1691 indexes = [] 1692 while True: 1693 index = self._parse_index() 1694 1695 # exp.Properties.Location.POST_INDEX 1696 extend_props(self._parse_properties()) 1697 1698 if not index: 1699 break 1700 else: 1701 self._match(TokenType.COMMA) 1702 indexes.append(index) 1703 elif create_token.token_type == TokenType.VIEW: 1704 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1705 no_schema_binding = True 1706 1707 shallow = self._match_text_seq("SHALLOW") 1708 1709 if self._match_texts(self.CLONE_KEYWORDS): 1710 copy = self._prev.text.lower() == "copy" 1711 clone = self.expression( 1712 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1713 ) 1714 1715 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1716 return self._parse_as_command(start) 1717 1718 return self.expression( 1719 exp.Create, 1720 comments=comments, 1721 this=this, 1722 kind=create_token.text.upper(), 1723 replace=replace, 1724 unique=unique, 1725 expression=expression, 1726 exists=exists, 1727 properties=properties, 1728 indexes=indexes, 1729 no_schema_binding=no_schema_binding, 1730 begin=begin, 1731 end=end, 1732 clone=clone, 1733 ) 1734 1735 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1736 seq = exp.SequenceProperties() 1737 1738 options = [] 1739 index = self._index 1740 1741 while self._curr: 1742 self._match(TokenType.COMMA) 1743 if self._match_text_seq("INCREMENT"): 1744 self._match_text_seq("BY") 1745 self._match_text_seq("=") 1746 seq.set("increment", self._parse_term()) 1747 elif self._match_text_seq("MINVALUE"): 1748 seq.set("minvalue", self._parse_term()) 1749 elif self._match_text_seq("MAXVALUE"): 1750 seq.set("maxvalue", self._parse_term()) 1751 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1752 self._match_text_seq("=") 1753 seq.set("start", self._parse_term()) 1754 elif self._match_text_seq("CACHE"): 1755 # T-SQL allows empty CACHE which is initialized dynamically 1756 seq.set("cache", self._parse_number() or True) 1757 elif self._match_text_seq("OWNED", "BY"): 1758 # "OWNED BY NONE" is the default 1759 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1760 else: 1761 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1762 if opt: 1763 options.append(opt) 1764 else: 1765 break 1766 1767 seq.set("options", options if options else None) 1768 return None if self._index == index else seq 1769 1770 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1771 # only used for teradata currently 1772 self._match(TokenType.COMMA) 1773 1774 kwargs = { 1775 "no": self._match_text_seq("NO"), 1776 "dual": self._match_text_seq("DUAL"), 1777 "before": self._match_text_seq("BEFORE"), 1778 "default": self._match_text_seq("DEFAULT"), 1779 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1780 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1781 "after": self._match_text_seq("AFTER"), 1782 "minimum": self._match_texts(("MIN", "MINIMUM")), 1783 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1784 } 1785 1786 if self._match_texts(self.PROPERTY_PARSERS): 1787 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1788 try: 1789 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1790 except TypeError: 1791 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1792 1793 return None 1794 1795 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1796 return self._parse_wrapped_csv(self._parse_property) 1797 1798 def _parse_property(self) -> t.Optional[exp.Expression]: 1799 if self._match_texts(self.PROPERTY_PARSERS): 1800 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1801 1802 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1803 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1804 1805 if self._match_text_seq("COMPOUND", "SORTKEY"): 1806 return self._parse_sortkey(compound=True) 1807 1808 if self._match_text_seq("SQL", "SECURITY"): 1809 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1810 1811 index = self._index 1812 key = self._parse_column() 1813 1814 if not self._match(TokenType.EQ): 1815 self._retreat(index) 1816 return self._parse_sequence_properties() 1817 1818 return self.expression( 1819 exp.Property, 1820 this=key.to_dot() if isinstance(key, exp.Column) else key, 1821 value=self._parse_bitwise() or self._parse_var(any_token=True), 1822 ) 1823 1824 def _parse_stored(self) -> exp.FileFormatProperty: 1825 self._match(TokenType.ALIAS) 1826 1827 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1828 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1829 1830 return self.expression( 1831 exp.FileFormatProperty, 1832 this=( 1833 self.expression( 1834 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1835 ) 1836 if input_format or output_format 1837 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1838 ), 1839 ) 1840 1841 def _parse_unquoted_field(self): 1842 field = self._parse_field() 1843 if isinstance(field, exp.Identifier) and not field.quoted: 1844 field = exp.var(field) 1845 1846 return field 1847 1848 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1849 self._match(TokenType.EQ) 1850 self._match(TokenType.ALIAS) 1851 1852 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1853 1854 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1855 properties = [] 1856 while True: 1857 if before: 1858 prop = self._parse_property_before() 1859 else: 1860 prop = self._parse_property() 1861 if not prop: 1862 break 1863 for p in ensure_list(prop): 1864 properties.append(p) 1865 1866 if properties: 1867 return self.expression(exp.Properties, expressions=properties) 1868 1869 return None 1870 1871 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1872 return self.expression( 1873 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1874 ) 1875 1876 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1877 if self._index >= 2: 1878 pre_volatile_token = self._tokens[self._index - 2] 1879 else: 1880 pre_volatile_token = None 1881 1882 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1883 return exp.VolatileProperty() 1884 1885 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1886 1887 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1888 self._match_pair(TokenType.EQ, TokenType.ON) 1889 1890 prop = self.expression(exp.WithSystemVersioningProperty) 1891 if self._match(TokenType.L_PAREN): 1892 self._match_text_seq("HISTORY_TABLE", "=") 1893 prop.set("this", self._parse_table_parts()) 1894 1895 if self._match(TokenType.COMMA): 1896 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1897 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1898 1899 self._match_r_paren() 1900 1901 return prop 1902 1903 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1904 if self._match(TokenType.L_PAREN, advance=False): 1905 return self._parse_wrapped_properties() 1906 1907 if self._match_text_seq("JOURNAL"): 1908 return self._parse_withjournaltable() 1909 1910 if self._match_texts(self.VIEW_ATTRIBUTES): 1911 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1912 1913 if self._match_text_seq("DATA"): 1914 return self._parse_withdata(no=False) 1915 elif self._match_text_seq("NO", "DATA"): 1916 return self._parse_withdata(no=True) 1917 1918 if not self._next: 1919 return None 1920 1921 return self._parse_withisolatedloading() 1922 1923 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1924 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1925 self._match(TokenType.EQ) 1926 1927 user = self._parse_id_var() 1928 self._match(TokenType.PARAMETER) 1929 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1930 1931 if not user or not host: 1932 return None 1933 1934 return exp.DefinerProperty(this=f"{user}@{host}") 1935 1936 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1937 self._match(TokenType.TABLE) 1938 self._match(TokenType.EQ) 1939 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1940 1941 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1942 return self.expression(exp.LogProperty, no=no) 1943 1944 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1945 return self.expression(exp.JournalProperty, **kwargs) 1946 1947 def _parse_checksum(self) -> exp.ChecksumProperty: 1948 self._match(TokenType.EQ) 1949 1950 on = None 1951 if self._match(TokenType.ON): 1952 on = True 1953 elif self._match_text_seq("OFF"): 1954 on = False 1955 1956 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1957 1958 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1959 return self.expression( 1960 exp.Cluster, 1961 expressions=( 1962 self._parse_wrapped_csv(self._parse_ordered) 1963 if wrapped 1964 else self._parse_csv(self._parse_ordered) 1965 ), 1966 ) 1967 1968 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1969 self._match_text_seq("BY") 1970 1971 self._match_l_paren() 1972 expressions = self._parse_csv(self._parse_column) 1973 self._match_r_paren() 1974 1975 if self._match_text_seq("SORTED", "BY"): 1976 self._match_l_paren() 1977 sorted_by = self._parse_csv(self._parse_ordered) 1978 self._match_r_paren() 1979 else: 1980 sorted_by = None 1981 1982 self._match(TokenType.INTO) 1983 buckets = self._parse_number() 1984 self._match_text_seq("BUCKETS") 1985 1986 return self.expression( 1987 exp.ClusteredByProperty, 1988 expressions=expressions, 1989 sorted_by=sorted_by, 1990 buckets=buckets, 1991 ) 1992 1993 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1994 if not self._match_text_seq("GRANTS"): 1995 self._retreat(self._index - 1) 1996 return None 1997 1998 return self.expression(exp.CopyGrantsProperty) 1999 2000 def _parse_freespace(self) -> exp.FreespaceProperty: 2001 self._match(TokenType.EQ) 2002 return self.expression( 2003 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2004 ) 2005 2006 def _parse_mergeblockratio( 2007 self, no: bool = False, default: bool = False 2008 ) -> exp.MergeBlockRatioProperty: 2009 if self._match(TokenType.EQ): 2010 return self.expression( 2011 exp.MergeBlockRatioProperty, 2012 this=self._parse_number(), 2013 percent=self._match(TokenType.PERCENT), 2014 ) 2015 2016 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2017 2018 def _parse_datablocksize( 2019 self, 2020 default: t.Optional[bool] = None, 2021 minimum: t.Optional[bool] = None, 2022 maximum: t.Optional[bool] = None, 2023 ) -> exp.DataBlocksizeProperty: 2024 self._match(TokenType.EQ) 2025 size = self._parse_number() 2026 2027 units = None 2028 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2029 units = self._prev.text 2030 2031 return self.expression( 2032 exp.DataBlocksizeProperty, 2033 size=size, 2034 units=units, 2035 default=default, 2036 minimum=minimum, 2037 maximum=maximum, 2038 ) 2039 2040 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2041 self._match(TokenType.EQ) 2042 always = self._match_text_seq("ALWAYS") 2043 manual = self._match_text_seq("MANUAL") 2044 never = self._match_text_seq("NEVER") 2045 default = self._match_text_seq("DEFAULT") 2046 2047 autotemp = None 2048 if self._match_text_seq("AUTOTEMP"): 2049 autotemp = self._parse_schema() 2050 2051 return self.expression( 2052 exp.BlockCompressionProperty, 2053 always=always, 2054 manual=manual, 2055 never=never, 2056 default=default, 2057 autotemp=autotemp, 2058 ) 2059 2060 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2061 index = self._index 2062 no = self._match_text_seq("NO") 2063 concurrent = self._match_text_seq("CONCURRENT") 2064 2065 if not self._match_text_seq("ISOLATED", "LOADING"): 2066 self._retreat(index) 2067 return None 2068 2069 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2070 return self.expression( 2071 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2072 ) 2073 2074 def _parse_locking(self) -> exp.LockingProperty: 2075 if self._match(TokenType.TABLE): 2076 kind = "TABLE" 2077 elif self._match(TokenType.VIEW): 2078 kind = "VIEW" 2079 elif self._match(TokenType.ROW): 2080 kind = "ROW" 2081 elif self._match_text_seq("DATABASE"): 2082 kind = "DATABASE" 2083 else: 2084 kind = None 2085 2086 if kind in ("DATABASE", "TABLE", "VIEW"): 2087 this = self._parse_table_parts() 2088 else: 2089 this = None 2090 2091 if self._match(TokenType.FOR): 2092 for_or_in = "FOR" 2093 elif self._match(TokenType.IN): 2094 for_or_in = "IN" 2095 else: 2096 for_or_in = None 2097 2098 if self._match_text_seq("ACCESS"): 2099 lock_type = "ACCESS" 2100 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2101 lock_type = "EXCLUSIVE" 2102 elif self._match_text_seq("SHARE"): 2103 lock_type = "SHARE" 2104 elif self._match_text_seq("READ"): 2105 lock_type = "READ" 2106 elif self._match_text_seq("WRITE"): 2107 lock_type = "WRITE" 2108 elif self._match_text_seq("CHECKSUM"): 2109 lock_type = "CHECKSUM" 2110 else: 2111 lock_type = None 2112 2113 override = self._match_text_seq("OVERRIDE") 2114 2115 return self.expression( 2116 exp.LockingProperty, 2117 this=this, 2118 kind=kind, 2119 for_or_in=for_or_in, 2120 lock_type=lock_type, 2121 override=override, 2122 ) 2123 2124 def _parse_partition_by(self) -> t.List[exp.Expression]: 2125 if self._match(TokenType.PARTITION_BY): 2126 return self._parse_csv(self._parse_conjunction) 2127 return [] 2128 2129 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2130 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2131 if self._match_text_seq("MINVALUE"): 2132 return exp.var("MINVALUE") 2133 if self._match_text_seq("MAXVALUE"): 2134 return exp.var("MAXVALUE") 2135 return self._parse_bitwise() 2136 2137 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2138 expression = None 2139 from_expressions = None 2140 to_expressions = None 2141 2142 if self._match(TokenType.IN): 2143 this = self._parse_wrapped_csv(self._parse_bitwise) 2144 elif self._match(TokenType.FROM): 2145 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2146 self._match_text_seq("TO") 2147 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2148 elif self._match_text_seq("WITH", "(", "MODULUS"): 2149 this = self._parse_number() 2150 self._match_text_seq(",", "REMAINDER") 2151 expression = self._parse_number() 2152 self._match_r_paren() 2153 else: 2154 self.raise_error("Failed to parse partition bound spec.") 2155 2156 return self.expression( 2157 exp.PartitionBoundSpec, 2158 this=this, 2159 expression=expression, 2160 from_expressions=from_expressions, 2161 to_expressions=to_expressions, 2162 ) 2163 2164 # https://www.postgresql.org/docs/current/sql-createtable.html 2165 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2166 if not self._match_text_seq("OF"): 2167 self._retreat(self._index - 1) 2168 return None 2169 2170 this = self._parse_table(schema=True) 2171 2172 if self._match(TokenType.DEFAULT): 2173 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2174 elif self._match_text_seq("FOR", "VALUES"): 2175 expression = self._parse_partition_bound_spec() 2176 else: 2177 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2178 2179 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2180 2181 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2182 self._match(TokenType.EQ) 2183 return self.expression( 2184 exp.PartitionedByProperty, 2185 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2186 ) 2187 2188 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2189 if self._match_text_seq("AND", "STATISTICS"): 2190 statistics = True 2191 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2192 statistics = False 2193 else: 2194 statistics = None 2195 2196 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2197 2198 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2199 if self._match_text_seq("SQL"): 2200 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2201 return None 2202 2203 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2204 if self._match_text_seq("SQL", "DATA"): 2205 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2206 return None 2207 2208 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2209 if self._match_text_seq("PRIMARY", "INDEX"): 2210 return exp.NoPrimaryIndexProperty() 2211 if self._match_text_seq("SQL"): 2212 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2213 return None 2214 2215 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2216 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2217 return exp.OnCommitProperty() 2218 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2219 return exp.OnCommitProperty(delete=True) 2220 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2221 2222 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2223 if self._match_text_seq("SQL", "DATA"): 2224 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2225 return None 2226 2227 def _parse_distkey(self) -> exp.DistKeyProperty: 2228 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2229 2230 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2231 table = self._parse_table(schema=True) 2232 2233 options = [] 2234 while self._match_texts(("INCLUDING", "EXCLUDING")): 2235 this = self._prev.text.upper() 2236 2237 id_var = self._parse_id_var() 2238 if not id_var: 2239 return None 2240 2241 options.append( 2242 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2243 ) 2244 2245 return self.expression(exp.LikeProperty, this=table, expressions=options) 2246 2247 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2248 return self.expression( 2249 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2250 ) 2251 2252 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2253 self._match(TokenType.EQ) 2254 return self.expression( 2255 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2256 ) 2257 2258 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2259 self._match_text_seq("WITH", "CONNECTION") 2260 return self.expression( 2261 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2262 ) 2263 2264 def _parse_returns(self) -> exp.ReturnsProperty: 2265 value: t.Optional[exp.Expression] 2266 is_table = self._match(TokenType.TABLE) 2267 2268 if is_table: 2269 if self._match(TokenType.LT): 2270 value = self.expression( 2271 exp.Schema, 2272 this="TABLE", 2273 expressions=self._parse_csv(self._parse_struct_types), 2274 ) 2275 if not self._match(TokenType.GT): 2276 self.raise_error("Expecting >") 2277 else: 2278 value = self._parse_schema(exp.var("TABLE")) 2279 else: 2280 value = self._parse_types() 2281 2282 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2283 2284 def _parse_describe(self) -> exp.Describe: 2285 kind = self._match_set(self.CREATABLES) and self._prev.text 2286 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2287 if self._match(TokenType.DOT): 2288 style = None 2289 self._retreat(self._index - 2) 2290 this = self._parse_table(schema=True) 2291 properties = self._parse_properties() 2292 expressions = properties.expressions if properties else None 2293 return self.expression( 2294 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2295 ) 2296 2297 def _parse_insert(self) -> exp.Insert: 2298 comments = ensure_list(self._prev_comments) 2299 hint = self._parse_hint() 2300 overwrite = self._match(TokenType.OVERWRITE) 2301 ignore = self._match(TokenType.IGNORE) 2302 local = self._match_text_seq("LOCAL") 2303 alternative = None 2304 is_function = None 2305 2306 if self._match_text_seq("DIRECTORY"): 2307 this: t.Optional[exp.Expression] = self.expression( 2308 exp.Directory, 2309 this=self._parse_var_or_string(), 2310 local=local, 2311 row_format=self._parse_row_format(match_row=True), 2312 ) 2313 else: 2314 if self._match(TokenType.OR): 2315 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2316 2317 self._match(TokenType.INTO) 2318 comments += ensure_list(self._prev_comments) 2319 self._match(TokenType.TABLE) 2320 is_function = self._match(TokenType.FUNCTION) 2321 2322 this = ( 2323 self._parse_table(schema=True, parse_partition=True) 2324 if not is_function 2325 else self._parse_function() 2326 ) 2327 2328 returning = self._parse_returning() 2329 2330 return self.expression( 2331 exp.Insert, 2332 comments=comments, 2333 hint=hint, 2334 is_function=is_function, 2335 this=this, 2336 stored=self._match_text_seq("STORED") and self._parse_stored(), 2337 by_name=self._match_text_seq("BY", "NAME"), 2338 exists=self._parse_exists(), 2339 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2340 and self._parse_conjunction(), 2341 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2342 conflict=self._parse_on_conflict(), 2343 returning=returning or self._parse_returning(), 2344 overwrite=overwrite, 2345 alternative=alternative, 2346 ignore=ignore, 2347 ) 2348 2349 def _parse_kill(self) -> exp.Kill: 2350 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2351 2352 return self.expression( 2353 exp.Kill, 2354 this=self._parse_primary(), 2355 kind=kind, 2356 ) 2357 2358 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2359 conflict = self._match_text_seq("ON", "CONFLICT") 2360 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2361 2362 if not conflict and not duplicate: 2363 return None 2364 2365 conflict_keys = None 2366 constraint = None 2367 2368 if conflict: 2369 if self._match_text_seq("ON", "CONSTRAINT"): 2370 constraint = self._parse_id_var() 2371 elif self._match(TokenType.L_PAREN): 2372 conflict_keys = self._parse_csv(self._parse_id_var) 2373 self._match_r_paren() 2374 2375 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2376 if self._prev.token_type == TokenType.UPDATE: 2377 self._match(TokenType.SET) 2378 expressions = self._parse_csv(self._parse_equality) 2379 else: 2380 expressions = None 2381 2382 return self.expression( 2383 exp.OnConflict, 2384 duplicate=duplicate, 2385 expressions=expressions, 2386 action=action, 2387 conflict_keys=conflict_keys, 2388 constraint=constraint, 2389 ) 2390 2391 def _parse_returning(self) -> t.Optional[exp.Returning]: 2392 if not self._match(TokenType.RETURNING): 2393 return None 2394 return self.expression( 2395 exp.Returning, 2396 expressions=self._parse_csv(self._parse_expression), 2397 into=self._match(TokenType.INTO) and self._parse_table_part(), 2398 ) 2399 2400 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2401 if not self._match(TokenType.FORMAT): 2402 return None 2403 return self._parse_row_format() 2404 2405 def _parse_row_format( 2406 self, match_row: bool = False 2407 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2408 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2409 return None 2410 2411 if self._match_text_seq("SERDE"): 2412 this = self._parse_string() 2413 2414 serde_properties = None 2415 if self._match(TokenType.SERDE_PROPERTIES): 2416 serde_properties = self.expression( 2417 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2418 ) 2419 2420 return self.expression( 2421 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2422 ) 2423 2424 self._match_text_seq("DELIMITED") 2425 2426 kwargs = {} 2427 2428 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2429 kwargs["fields"] = self._parse_string() 2430 if self._match_text_seq("ESCAPED", "BY"): 2431 kwargs["escaped"] = self._parse_string() 2432 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2433 kwargs["collection_items"] = self._parse_string() 2434 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2435 kwargs["map_keys"] = self._parse_string() 2436 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2437 kwargs["lines"] = self._parse_string() 2438 if self._match_text_seq("NULL", "DEFINED", "AS"): 2439 kwargs["null"] = self._parse_string() 2440 2441 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2442 2443 def _parse_load(self) -> exp.LoadData | exp.Command: 2444 if self._match_text_seq("DATA"): 2445 local = self._match_text_seq("LOCAL") 2446 self._match_text_seq("INPATH") 2447 inpath = self._parse_string() 2448 overwrite = self._match(TokenType.OVERWRITE) 2449 self._match_pair(TokenType.INTO, TokenType.TABLE) 2450 2451 return self.expression( 2452 exp.LoadData, 2453 this=self._parse_table(schema=True), 2454 local=local, 2455 overwrite=overwrite, 2456 inpath=inpath, 2457 partition=self._parse_partition(), 2458 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2459 serde=self._match_text_seq("SERDE") and self._parse_string(), 2460 ) 2461 return self._parse_as_command(self._prev) 2462 2463 def _parse_delete(self) -> exp.Delete: 2464 # This handles MySQL's "Multiple-Table Syntax" 2465 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2466 tables = None 2467 comments = self._prev_comments 2468 if not self._match(TokenType.FROM, advance=False): 2469 tables = self._parse_csv(self._parse_table) or None 2470 2471 returning = self._parse_returning() 2472 2473 return self.expression( 2474 exp.Delete, 2475 comments=comments, 2476 tables=tables, 2477 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2478 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2479 where=self._parse_where(), 2480 returning=returning or self._parse_returning(), 2481 limit=self._parse_limit(), 2482 ) 2483 2484 def _parse_update(self) -> exp.Update: 2485 comments = self._prev_comments 2486 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2487 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2488 returning = self._parse_returning() 2489 return self.expression( 2490 exp.Update, 2491 comments=comments, 2492 **{ # type: ignore 2493 "this": this, 2494 "expressions": expressions, 2495 "from": self._parse_from(joins=True), 2496 "where": self._parse_where(), 2497 "returning": returning or self._parse_returning(), 2498 "order": self._parse_order(), 2499 "limit": self._parse_limit(), 2500 }, 2501 ) 2502 2503 def _parse_uncache(self) -> exp.Uncache: 2504 if not self._match(TokenType.TABLE): 2505 self.raise_error("Expecting TABLE after UNCACHE") 2506 2507 return self.expression( 2508 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2509 ) 2510 2511 def _parse_cache(self) -> exp.Cache: 2512 lazy = self._match_text_seq("LAZY") 2513 self._match(TokenType.TABLE) 2514 table = self._parse_table(schema=True) 2515 2516 options = [] 2517 if self._match_text_seq("OPTIONS"): 2518 self._match_l_paren() 2519 k = self._parse_string() 2520 self._match(TokenType.EQ) 2521 v = self._parse_string() 2522 options = [k, v] 2523 self._match_r_paren() 2524 2525 self._match(TokenType.ALIAS) 2526 return self.expression( 2527 exp.Cache, 2528 this=table, 2529 lazy=lazy, 2530 options=options, 2531 expression=self._parse_select(nested=True), 2532 ) 2533 2534 def _parse_partition(self) -> t.Optional[exp.Partition]: 2535 if not self._match(TokenType.PARTITION): 2536 return None 2537 2538 return self.expression( 2539 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2540 ) 2541 2542 def _parse_value(self) -> t.Optional[exp.Tuple]: 2543 if self._match(TokenType.L_PAREN): 2544 expressions = self._parse_csv(self._parse_expression) 2545 self._match_r_paren() 2546 return self.expression(exp.Tuple, expressions=expressions) 2547 2548 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2549 expression = self._parse_expression() 2550 if expression: 2551 return self.expression(exp.Tuple, expressions=[expression]) 2552 return None 2553 2554 def _parse_projections(self) -> t.List[exp.Expression]: 2555 return self._parse_expressions() 2556 2557 def _parse_select( 2558 self, 2559 nested: bool = False, 2560 table: bool = False, 2561 parse_subquery_alias: bool = True, 2562 parse_set_operation: bool = True, 2563 ) -> t.Optional[exp.Expression]: 2564 cte = self._parse_with() 2565 2566 if cte: 2567 this = self._parse_statement() 2568 2569 if not this: 2570 self.raise_error("Failed to parse any statement following CTE") 2571 return cte 2572 2573 if "with" in this.arg_types: 2574 this.set("with", cte) 2575 else: 2576 self.raise_error(f"{this.key} does not support CTE") 2577 this = cte 2578 2579 return this 2580 2581 # duckdb supports leading with FROM x 2582 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2583 2584 if self._match(TokenType.SELECT): 2585 comments = self._prev_comments 2586 2587 hint = self._parse_hint() 2588 all_ = self._match(TokenType.ALL) 2589 distinct = self._match_set(self.DISTINCT_TOKENS) 2590 2591 kind = ( 2592 self._match(TokenType.ALIAS) 2593 and self._match_texts(("STRUCT", "VALUE")) 2594 and self._prev.text.upper() 2595 ) 2596 2597 if distinct: 2598 distinct = self.expression( 2599 exp.Distinct, 2600 on=self._parse_value() if self._match(TokenType.ON) else None, 2601 ) 2602 2603 if all_ and distinct: 2604 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2605 2606 limit = self._parse_limit(top=True) 2607 projections = self._parse_projections() 2608 2609 this = self.expression( 2610 exp.Select, 2611 kind=kind, 2612 hint=hint, 2613 distinct=distinct, 2614 expressions=projections, 2615 limit=limit, 2616 ) 2617 this.comments = comments 2618 2619 into = self._parse_into() 2620 if into: 2621 this.set("into", into) 2622 2623 if not from_: 2624 from_ = self._parse_from() 2625 2626 if from_: 2627 this.set("from", from_) 2628 2629 this = self._parse_query_modifiers(this) 2630 elif (table or nested) and self._match(TokenType.L_PAREN): 2631 if self._match(TokenType.PIVOT): 2632 this = self._parse_simplified_pivot() 2633 elif self._match(TokenType.FROM): 2634 this = exp.select("*").from_( 2635 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2636 ) 2637 else: 2638 this = ( 2639 self._parse_table() 2640 if table 2641 else self._parse_select(nested=True, parse_set_operation=False) 2642 ) 2643 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2644 2645 self._match_r_paren() 2646 2647 # We return early here so that the UNION isn't attached to the subquery by the 2648 # following call to _parse_set_operations, but instead becomes the parent node 2649 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2650 elif self._match(TokenType.VALUES, advance=False): 2651 this = self._parse_derived_table_values() 2652 elif from_: 2653 this = exp.select("*").from_(from_.this, copy=False) 2654 else: 2655 this = None 2656 2657 if parse_set_operation: 2658 return self._parse_set_operations(this) 2659 return this 2660 2661 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2662 if not skip_with_token and not self._match(TokenType.WITH): 2663 return None 2664 2665 comments = self._prev_comments 2666 recursive = self._match(TokenType.RECURSIVE) 2667 2668 expressions = [] 2669 while True: 2670 expressions.append(self._parse_cte()) 2671 2672 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2673 break 2674 else: 2675 self._match(TokenType.WITH) 2676 2677 return self.expression( 2678 exp.With, comments=comments, expressions=expressions, recursive=recursive 2679 ) 2680 2681 def _parse_cte(self) -> exp.CTE: 2682 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2683 if not alias or not alias.this: 2684 self.raise_error("Expected CTE to have alias") 2685 2686 self._match(TokenType.ALIAS) 2687 2688 if self._match_text_seq("NOT", "MATERIALIZED"): 2689 materialized = False 2690 elif self._match_text_seq("MATERIALIZED"): 2691 materialized = True 2692 else: 2693 materialized = None 2694 2695 return self.expression( 2696 exp.CTE, 2697 this=self._parse_wrapped(self._parse_statement), 2698 alias=alias, 2699 materialized=materialized, 2700 ) 2701 2702 def _parse_table_alias( 2703 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2704 ) -> t.Optional[exp.TableAlias]: 2705 any_token = self._match(TokenType.ALIAS) 2706 alias = ( 2707 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2708 or self._parse_string_as_identifier() 2709 ) 2710 2711 index = self._index 2712 if self._match(TokenType.L_PAREN): 2713 columns = self._parse_csv(self._parse_function_parameter) 2714 self._match_r_paren() if columns else self._retreat(index) 2715 else: 2716 columns = None 2717 2718 if not alias and not columns: 2719 return None 2720 2721 return self.expression(exp.TableAlias, this=alias, columns=columns) 2722 2723 def _parse_subquery( 2724 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2725 ) -> t.Optional[exp.Subquery]: 2726 if not this: 2727 return None 2728 2729 return self.expression( 2730 exp.Subquery, 2731 this=this, 2732 pivots=self._parse_pivots(), 2733 alias=self._parse_table_alias() if parse_alias else None, 2734 ) 2735 2736 def _implicit_unnests_to_explicit(self, this: E) -> E: 2737 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2738 2739 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2740 for i, join in enumerate(this.args.get("joins") or []): 2741 table = join.this 2742 normalized_table = table.copy() 2743 normalized_table.meta["maybe_column"] = True 2744 normalized_table = _norm(normalized_table, dialect=self.dialect) 2745 2746 if isinstance(table, exp.Table) and not join.args.get("on"): 2747 if normalized_table.parts[0].name in refs: 2748 table_as_column = table.to_column() 2749 unnest = exp.Unnest(expressions=[table_as_column]) 2750 2751 # Table.to_column creates a parent Alias node that we want to convert to 2752 # a TableAlias and attach to the Unnest, so it matches the parser's output 2753 if isinstance(table.args.get("alias"), exp.TableAlias): 2754 table_as_column.replace(table_as_column.this) 2755 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2756 2757 table.replace(unnest) 2758 2759 refs.add(normalized_table.alias_or_name) 2760 2761 return this 2762 2763 def _parse_query_modifiers( 2764 self, this: t.Optional[exp.Expression] 2765 ) -> t.Optional[exp.Expression]: 2766 if isinstance(this, (exp.Query, exp.Table)): 2767 for join in self._parse_joins(): 2768 this.append("joins", join) 2769 for lateral in iter(self._parse_lateral, None): 2770 this.append("laterals", lateral) 2771 2772 while True: 2773 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2774 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2775 key, expression = parser(self) 2776 2777 if expression: 2778 this.set(key, expression) 2779 if key == "limit": 2780 offset = expression.args.pop("offset", None) 2781 2782 if offset: 2783 offset = exp.Offset(expression=offset) 2784 this.set("offset", offset) 2785 2786 limit_by_expressions = expression.expressions 2787 expression.set("expressions", None) 2788 offset.set("expressions", limit_by_expressions) 2789 continue 2790 break 2791 2792 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2793 this = self._implicit_unnests_to_explicit(this) 2794 2795 return this 2796 2797 def _parse_hint(self) -> t.Optional[exp.Hint]: 2798 if self._match(TokenType.HINT): 2799 hints = [] 2800 for hint in iter( 2801 lambda: self._parse_csv( 2802 lambda: self._parse_function() or self._parse_var(upper=True) 2803 ), 2804 [], 2805 ): 2806 hints.extend(hint) 2807 2808 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2809 self.raise_error("Expected */ after HINT") 2810 2811 return self.expression(exp.Hint, expressions=hints) 2812 2813 return None 2814 2815 def _parse_into(self) -> t.Optional[exp.Into]: 2816 if not self._match(TokenType.INTO): 2817 return None 2818 2819 temp = self._match(TokenType.TEMPORARY) 2820 unlogged = self._match_text_seq("UNLOGGED") 2821 self._match(TokenType.TABLE) 2822 2823 return self.expression( 2824 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2825 ) 2826 2827 def _parse_from( 2828 self, joins: bool = False, skip_from_token: bool = False 2829 ) -> t.Optional[exp.From]: 2830 if not skip_from_token and not self._match(TokenType.FROM): 2831 return None 2832 2833 return self.expression( 2834 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2835 ) 2836 2837 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2838 return self.expression( 2839 exp.MatchRecognizeMeasure, 2840 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2841 this=self._parse_expression(), 2842 ) 2843 2844 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2845 if not self._match(TokenType.MATCH_RECOGNIZE): 2846 return None 2847 2848 self._match_l_paren() 2849 2850 partition = self._parse_partition_by() 2851 order = self._parse_order() 2852 2853 measures = ( 2854 self._parse_csv(self._parse_match_recognize_measure) 2855 if self._match_text_seq("MEASURES") 2856 else None 2857 ) 2858 2859 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2860 rows = exp.var("ONE ROW PER MATCH") 2861 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2862 text = "ALL ROWS PER MATCH" 2863 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2864 text += " SHOW EMPTY MATCHES" 2865 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2866 text += " OMIT EMPTY MATCHES" 2867 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2868 text += " WITH UNMATCHED ROWS" 2869 rows = exp.var(text) 2870 else: 2871 rows = None 2872 2873 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2874 text = "AFTER MATCH SKIP" 2875 if self._match_text_seq("PAST", "LAST", "ROW"): 2876 text += " PAST LAST ROW" 2877 elif self._match_text_seq("TO", "NEXT", "ROW"): 2878 text += " TO NEXT ROW" 2879 elif self._match_text_seq("TO", "FIRST"): 2880 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2881 elif self._match_text_seq("TO", "LAST"): 2882 text += f" TO LAST {self._advance_any().text}" # type: ignore 2883 after = exp.var(text) 2884 else: 2885 after = None 2886 2887 if self._match_text_seq("PATTERN"): 2888 self._match_l_paren() 2889 2890 if not self._curr: 2891 self.raise_error("Expecting )", self._curr) 2892 2893 paren = 1 2894 start = self._curr 2895 2896 while self._curr and paren > 0: 2897 if self._curr.token_type == TokenType.L_PAREN: 2898 paren += 1 2899 if self._curr.token_type == TokenType.R_PAREN: 2900 paren -= 1 2901 2902 end = self._prev 2903 self._advance() 2904 2905 if paren > 0: 2906 self.raise_error("Expecting )", self._curr) 2907 2908 pattern = exp.var(self._find_sql(start, end)) 2909 else: 2910 pattern = None 2911 2912 define = ( 2913 self._parse_csv(self._parse_name_as_expression) 2914 if self._match_text_seq("DEFINE") 2915 else None 2916 ) 2917 2918 self._match_r_paren() 2919 2920 return self.expression( 2921 exp.MatchRecognize, 2922 partition_by=partition, 2923 order=order, 2924 measures=measures, 2925 rows=rows, 2926 after=after, 2927 pattern=pattern, 2928 define=define, 2929 alias=self._parse_table_alias(), 2930 ) 2931 2932 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2933 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2934 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2935 cross_apply = False 2936 2937 if cross_apply is not None: 2938 this = self._parse_select(table=True) 2939 view = None 2940 outer = None 2941 elif self._match(TokenType.LATERAL): 2942 this = self._parse_select(table=True) 2943 view = self._match(TokenType.VIEW) 2944 outer = self._match(TokenType.OUTER) 2945 else: 2946 return None 2947 2948 if not this: 2949 this = ( 2950 self._parse_unnest() 2951 or self._parse_function() 2952 or self._parse_id_var(any_token=False) 2953 ) 2954 2955 while self._match(TokenType.DOT): 2956 this = exp.Dot( 2957 this=this, 2958 expression=self._parse_function() or self._parse_id_var(any_token=False), 2959 ) 2960 2961 if view: 2962 table = self._parse_id_var(any_token=False) 2963 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2964 table_alias: t.Optional[exp.TableAlias] = self.expression( 2965 exp.TableAlias, this=table, columns=columns 2966 ) 2967 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2968 # We move the alias from the lateral's child node to the lateral itself 2969 table_alias = this.args["alias"].pop() 2970 else: 2971 table_alias = self._parse_table_alias() 2972 2973 return self.expression( 2974 exp.Lateral, 2975 this=this, 2976 view=view, 2977 outer=outer, 2978 alias=table_alias, 2979 cross_apply=cross_apply, 2980 ) 2981 2982 def _parse_join_parts( 2983 self, 2984 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2985 return ( 2986 self._match_set(self.JOIN_METHODS) and self._prev, 2987 self._match_set(self.JOIN_SIDES) and self._prev, 2988 self._match_set(self.JOIN_KINDS) and self._prev, 2989 ) 2990 2991 def _parse_join( 2992 self, skip_join_token: bool = False, parse_bracket: bool = False 2993 ) -> t.Optional[exp.Join]: 2994 if self._match(TokenType.COMMA): 2995 return self.expression(exp.Join, this=self._parse_table()) 2996 2997 index = self._index 2998 method, side, kind = self._parse_join_parts() 2999 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3000 join = self._match(TokenType.JOIN) 3001 3002 if not skip_join_token and not join: 3003 self._retreat(index) 3004 kind = None 3005 method = None 3006 side = None 3007 3008 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3009 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3010 3011 if not skip_join_token and not join and not outer_apply and not cross_apply: 3012 return None 3013 3014 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3015 3016 if method: 3017 kwargs["method"] = method.text 3018 if side: 3019 kwargs["side"] = side.text 3020 if kind: 3021 kwargs["kind"] = kind.text 3022 if hint: 3023 kwargs["hint"] = hint 3024 3025 if self._match(TokenType.MATCH_CONDITION): 3026 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3027 3028 if self._match(TokenType.ON): 3029 kwargs["on"] = self._parse_conjunction() 3030 elif self._match(TokenType.USING): 3031 kwargs["using"] = self._parse_wrapped_id_vars() 3032 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3033 kind and kind.token_type == TokenType.CROSS 3034 ): 3035 index = self._index 3036 joins: t.Optional[list] = list(self._parse_joins()) 3037 3038 if joins and self._match(TokenType.ON): 3039 kwargs["on"] = self._parse_conjunction() 3040 elif joins and self._match(TokenType.USING): 3041 kwargs["using"] = self._parse_wrapped_id_vars() 3042 else: 3043 joins = None 3044 self._retreat(index) 3045 3046 kwargs["this"].set("joins", joins if joins else None) 3047 3048 comments = [c for token in (method, side, kind) if token for c in token.comments] 3049 return self.expression(exp.Join, comments=comments, **kwargs) 3050 3051 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3052 this = self._parse_conjunction() 3053 3054 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3055 return this 3056 3057 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3058 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3059 3060 return this 3061 3062 def _parse_index_params(self) -> exp.IndexParameters: 3063 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3064 3065 if self._match(TokenType.L_PAREN, advance=False): 3066 columns = self._parse_wrapped_csv(self._parse_with_operator) 3067 else: 3068 columns = None 3069 3070 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3071 partition_by = self._parse_partition_by() 3072 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3073 tablespace = ( 3074 self._parse_var(any_token=True) 3075 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3076 else None 3077 ) 3078 where = self._parse_where() 3079 3080 return self.expression( 3081 exp.IndexParameters, 3082 using=using, 3083 columns=columns, 3084 include=include, 3085 partition_by=partition_by, 3086 where=where, 3087 with_storage=with_storage, 3088 tablespace=tablespace, 3089 ) 3090 3091 def _parse_index( 3092 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3093 ) -> t.Optional[exp.Index]: 3094 if index or anonymous: 3095 unique = None 3096 primary = None 3097 amp = None 3098 3099 self._match(TokenType.ON) 3100 self._match(TokenType.TABLE) # hive 3101 table = self._parse_table_parts(schema=True) 3102 else: 3103 unique = self._match(TokenType.UNIQUE) 3104 primary = self._match_text_seq("PRIMARY") 3105 amp = self._match_text_seq("AMP") 3106 3107 if not self._match(TokenType.INDEX): 3108 return None 3109 3110 index = self._parse_id_var() 3111 table = None 3112 3113 params = self._parse_index_params() 3114 3115 return self.expression( 3116 exp.Index, 3117 this=index, 3118 table=table, 3119 unique=unique, 3120 primary=primary, 3121 amp=amp, 3122 params=params, 3123 ) 3124 3125 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3126 hints: t.List[exp.Expression] = [] 3127 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3128 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3129 hints.append( 3130 self.expression( 3131 exp.WithTableHint, 3132 expressions=self._parse_csv( 3133 lambda: self._parse_function() or self._parse_var(any_token=True) 3134 ), 3135 ) 3136 ) 3137 self._match_r_paren() 3138 else: 3139 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3140 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3141 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3142 3143 self._match_texts(("INDEX", "KEY")) 3144 if self._match(TokenType.FOR): 3145 hint.set("target", self._advance_any() and self._prev.text.upper()) 3146 3147 hint.set("expressions", self._parse_wrapped_id_vars()) 3148 hints.append(hint) 3149 3150 return hints or None 3151 3152 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3153 return ( 3154 (not schema and self._parse_function(optional_parens=False)) 3155 or self._parse_id_var(any_token=False) 3156 or self._parse_string_as_identifier() 3157 or self._parse_placeholder() 3158 ) 3159 3160 def _parse_table_parts( 3161 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3162 ) -> exp.Table: 3163 catalog = None 3164 db = None 3165 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3166 3167 while self._match(TokenType.DOT): 3168 if catalog: 3169 # This allows nesting the table in arbitrarily many dot expressions if needed 3170 table = self.expression( 3171 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3172 ) 3173 else: 3174 catalog = db 3175 db = table 3176 # "" used for tsql FROM a..b case 3177 table = self._parse_table_part(schema=schema) or "" 3178 3179 if ( 3180 wildcard 3181 and self._is_connected() 3182 and (isinstance(table, exp.Identifier) or not table) 3183 and self._match(TokenType.STAR) 3184 ): 3185 if isinstance(table, exp.Identifier): 3186 table.args["this"] += "*" 3187 else: 3188 table = exp.Identifier(this="*") 3189 3190 # We bubble up comments from the Identifier to the Table 3191 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3192 3193 if is_db_reference: 3194 catalog = db 3195 db = table 3196 table = None 3197 3198 if not table and not is_db_reference: 3199 self.raise_error(f"Expected table name but got {self._curr}") 3200 if not db and is_db_reference: 3201 self.raise_error(f"Expected database name but got {self._curr}") 3202 3203 return self.expression( 3204 exp.Table, 3205 comments=comments, 3206 this=table, 3207 db=db, 3208 catalog=catalog, 3209 pivots=self._parse_pivots(), 3210 ) 3211 3212 def _parse_table( 3213 self, 3214 schema: bool = False, 3215 joins: bool = False, 3216 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3217 parse_bracket: bool = False, 3218 is_db_reference: bool = False, 3219 parse_partition: bool = False, 3220 ) -> t.Optional[exp.Expression]: 3221 lateral = self._parse_lateral() 3222 if lateral: 3223 return lateral 3224 3225 unnest = self._parse_unnest() 3226 if unnest: 3227 return unnest 3228 3229 values = self._parse_derived_table_values() 3230 if values: 3231 return values 3232 3233 subquery = self._parse_select(table=True) 3234 if subquery: 3235 if not subquery.args.get("pivots"): 3236 subquery.set("pivots", self._parse_pivots()) 3237 return subquery 3238 3239 bracket = parse_bracket and self._parse_bracket(None) 3240 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3241 3242 only = self._match(TokenType.ONLY) 3243 3244 this = t.cast( 3245 exp.Expression, 3246 bracket 3247 or self._parse_bracket( 3248 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3249 ), 3250 ) 3251 3252 if only: 3253 this.set("only", only) 3254 3255 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3256 self._match_text_seq("*") 3257 3258 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3259 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3260 this.set("partition", self._parse_partition()) 3261 3262 if schema: 3263 return self._parse_schema(this=this) 3264 3265 version = self._parse_version() 3266 3267 if version: 3268 this.set("version", version) 3269 3270 if self.dialect.ALIAS_POST_TABLESAMPLE: 3271 table_sample = self._parse_table_sample() 3272 3273 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3274 if alias: 3275 this.set("alias", alias) 3276 3277 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3278 return self.expression( 3279 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3280 ) 3281 3282 this.set("hints", self._parse_table_hints()) 3283 3284 if not this.args.get("pivots"): 3285 this.set("pivots", self._parse_pivots()) 3286 3287 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3288 table_sample = self._parse_table_sample() 3289 3290 if table_sample: 3291 table_sample.set("this", this) 3292 this = table_sample 3293 3294 if joins: 3295 for join in self._parse_joins(): 3296 this.append("joins", join) 3297 3298 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3299 this.set("ordinality", True) 3300 this.set("alias", self._parse_table_alias()) 3301 3302 return this 3303 3304 def _parse_version(self) -> t.Optional[exp.Version]: 3305 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3306 this = "TIMESTAMP" 3307 elif self._match(TokenType.VERSION_SNAPSHOT): 3308 this = "VERSION" 3309 else: 3310 return None 3311 3312 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3313 kind = self._prev.text.upper() 3314 start = self._parse_bitwise() 3315 self._match_texts(("TO", "AND")) 3316 end = self._parse_bitwise() 3317 expression: t.Optional[exp.Expression] = self.expression( 3318 exp.Tuple, expressions=[start, end] 3319 ) 3320 elif self._match_text_seq("CONTAINED", "IN"): 3321 kind = "CONTAINED IN" 3322 expression = self.expression( 3323 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3324 ) 3325 elif self._match(TokenType.ALL): 3326 kind = "ALL" 3327 expression = None 3328 else: 3329 self._match_text_seq("AS", "OF") 3330 kind = "AS OF" 3331 expression = self._parse_type() 3332 3333 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3334 3335 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3336 if not self._match(TokenType.UNNEST): 3337 return None 3338 3339 expressions = self._parse_wrapped_csv(self._parse_equality) 3340 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3341 3342 alias = self._parse_table_alias() if with_alias else None 3343 3344 if alias: 3345 if self.dialect.UNNEST_COLUMN_ONLY: 3346 if alias.args.get("columns"): 3347 self.raise_error("Unexpected extra column alias in unnest.") 3348 3349 alias.set("columns", [alias.this]) 3350 alias.set("this", None) 3351 3352 columns = alias.args.get("columns") or [] 3353 if offset and len(expressions) < len(columns): 3354 offset = columns.pop() 3355 3356 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3357 self._match(TokenType.ALIAS) 3358 offset = self._parse_id_var( 3359 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3360 ) or exp.to_identifier("offset") 3361 3362 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3363 3364 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3365 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3366 if not is_derived and not self._match_text_seq("VALUES"): 3367 return None 3368 3369 expressions = self._parse_csv(self._parse_value) 3370 alias = self._parse_table_alias() 3371 3372 if is_derived: 3373 self._match_r_paren() 3374 3375 return self.expression( 3376 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3377 ) 3378 3379 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3380 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3381 as_modifier and self._match_text_seq("USING", "SAMPLE") 3382 ): 3383 return None 3384 3385 bucket_numerator = None 3386 bucket_denominator = None 3387 bucket_field = None 3388 percent = None 3389 size = None 3390 seed = None 3391 3392 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3393 matched_l_paren = self._match(TokenType.L_PAREN) 3394 3395 if self.TABLESAMPLE_CSV: 3396 num = None 3397 expressions = self._parse_csv(self._parse_primary) 3398 else: 3399 expressions = None 3400 num = ( 3401 self._parse_factor() 3402 if self._match(TokenType.NUMBER, advance=False) 3403 else self._parse_primary() or self._parse_placeholder() 3404 ) 3405 3406 if self._match_text_seq("BUCKET"): 3407 bucket_numerator = self._parse_number() 3408 self._match_text_seq("OUT", "OF") 3409 bucket_denominator = bucket_denominator = self._parse_number() 3410 self._match(TokenType.ON) 3411 bucket_field = self._parse_field() 3412 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3413 percent = num 3414 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3415 size = num 3416 else: 3417 percent = num 3418 3419 if matched_l_paren: 3420 self._match_r_paren() 3421 3422 if self._match(TokenType.L_PAREN): 3423 method = self._parse_var(upper=True) 3424 seed = self._match(TokenType.COMMA) and self._parse_number() 3425 self._match_r_paren() 3426 elif self._match_texts(("SEED", "REPEATABLE")): 3427 seed = self._parse_wrapped(self._parse_number) 3428 3429 if not method and self.DEFAULT_SAMPLING_METHOD: 3430 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3431 3432 return self.expression( 3433 exp.TableSample, 3434 expressions=expressions, 3435 method=method, 3436 bucket_numerator=bucket_numerator, 3437 bucket_denominator=bucket_denominator, 3438 bucket_field=bucket_field, 3439 percent=percent, 3440 size=size, 3441 seed=seed, 3442 ) 3443 3444 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3445 return list(iter(self._parse_pivot, None)) or None 3446 3447 def _parse_joins(self) -> t.Iterator[exp.Join]: 3448 return iter(self._parse_join, None) 3449 3450 # https://duckdb.org/docs/sql/statements/pivot 3451 def _parse_simplified_pivot(self) -> exp.Pivot: 3452 def _parse_on() -> t.Optional[exp.Expression]: 3453 this = self._parse_bitwise() 3454 return self._parse_in(this) if self._match(TokenType.IN) else this 3455 3456 this = self._parse_table() 3457 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3458 using = self._match(TokenType.USING) and self._parse_csv( 3459 lambda: self._parse_alias(self._parse_function()) 3460 ) 3461 group = self._parse_group() 3462 return self.expression( 3463 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3464 ) 3465 3466 def _parse_pivot_in(self) -> exp.In: 3467 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3468 this = self._parse_conjunction() 3469 3470 self._match(TokenType.ALIAS) 3471 alias = self._parse_field() 3472 if alias: 3473 return self.expression(exp.PivotAlias, this=this, alias=alias) 3474 3475 return this 3476 3477 value = self._parse_column() 3478 3479 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3480 self.raise_error("Expecting IN (") 3481 3482 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3483 3484 self._match_r_paren() 3485 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3486 3487 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3488 index = self._index 3489 include_nulls = None 3490 3491 if self._match(TokenType.PIVOT): 3492 unpivot = False 3493 elif self._match(TokenType.UNPIVOT): 3494 unpivot = True 3495 3496 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3497 if self._match_text_seq("INCLUDE", "NULLS"): 3498 include_nulls = True 3499 elif self._match_text_seq("EXCLUDE", "NULLS"): 3500 include_nulls = False 3501 else: 3502 return None 3503 3504 expressions = [] 3505 3506 if not self._match(TokenType.L_PAREN): 3507 self._retreat(index) 3508 return None 3509 3510 if unpivot: 3511 expressions = self._parse_csv(self._parse_column) 3512 else: 3513 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3514 3515 if not expressions: 3516 self.raise_error("Failed to parse PIVOT's aggregation list") 3517 3518 if not self._match(TokenType.FOR): 3519 self.raise_error("Expecting FOR") 3520 3521 field = self._parse_pivot_in() 3522 3523 self._match_r_paren() 3524 3525 pivot = self.expression( 3526 exp.Pivot, 3527 expressions=expressions, 3528 field=field, 3529 unpivot=unpivot, 3530 include_nulls=include_nulls, 3531 ) 3532 3533 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3534 pivot.set("alias", self._parse_table_alias()) 3535 3536 if not unpivot: 3537 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3538 3539 columns: t.List[exp.Expression] = [] 3540 for fld in pivot.args["field"].expressions: 3541 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3542 for name in names: 3543 if self.PREFIXED_PIVOT_COLUMNS: 3544 name = f"{name}_{field_name}" if name else field_name 3545 else: 3546 name = f"{field_name}_{name}" if name else field_name 3547 3548 columns.append(exp.to_identifier(name)) 3549 3550 pivot.set("columns", columns) 3551 3552 return pivot 3553 3554 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3555 return [agg.alias for agg in aggregations] 3556 3557 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3558 if not skip_where_token and not self._match(TokenType.PREWHERE): 3559 return None 3560 3561 return self.expression( 3562 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3563 ) 3564 3565 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3566 if not skip_where_token and not self._match(TokenType.WHERE): 3567 return None 3568 3569 return self.expression( 3570 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3571 ) 3572 3573 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3574 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3575 return None 3576 3577 elements: t.Dict[str, t.Any] = defaultdict(list) 3578 3579 if self._match(TokenType.ALL): 3580 elements["all"] = True 3581 elif self._match(TokenType.DISTINCT): 3582 elements["all"] = False 3583 3584 while True: 3585 expressions = self._parse_csv( 3586 lambda: None 3587 if self._match(TokenType.ROLLUP, advance=False) 3588 else self._parse_conjunction() 3589 ) 3590 if expressions: 3591 elements["expressions"].extend(expressions) 3592 3593 grouping_sets = self._parse_grouping_sets() 3594 if grouping_sets: 3595 elements["grouping_sets"].extend(grouping_sets) 3596 3597 rollup = None 3598 cube = None 3599 totals = None 3600 3601 index = self._index 3602 with_ = self._match(TokenType.WITH) 3603 if self._match(TokenType.ROLLUP): 3604 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3605 elements["rollup"].extend(ensure_list(rollup)) 3606 3607 if self._match(TokenType.CUBE): 3608 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3609 elements["cube"].extend(ensure_list(cube)) 3610 3611 if self._match_text_seq("TOTALS"): 3612 totals = True 3613 elements["totals"] = True # type: ignore 3614 3615 if not (grouping_sets or rollup or cube or totals): 3616 if with_: 3617 self._retreat(index) 3618 break 3619 3620 return self.expression(exp.Group, **elements) # type: ignore 3621 3622 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3623 if not self._match(TokenType.GROUPING_SETS): 3624 return None 3625 3626 return self._parse_wrapped_csv(self._parse_grouping_set) 3627 3628 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3629 if self._match(TokenType.L_PAREN): 3630 grouping_set = self._parse_csv(self._parse_column) 3631 self._match_r_paren() 3632 return self.expression(exp.Tuple, expressions=grouping_set) 3633 3634 return self._parse_column() 3635 3636 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3637 if not skip_having_token and not self._match(TokenType.HAVING): 3638 return None 3639 return self.expression(exp.Having, this=self._parse_conjunction()) 3640 3641 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3642 if not self._match(TokenType.QUALIFY): 3643 return None 3644 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3645 3646 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3647 if skip_start_token: 3648 start = None 3649 elif self._match(TokenType.START_WITH): 3650 start = self._parse_conjunction() 3651 else: 3652 return None 3653 3654 self._match(TokenType.CONNECT_BY) 3655 nocycle = self._match_text_seq("NOCYCLE") 3656 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3657 exp.Prior, this=self._parse_bitwise() 3658 ) 3659 connect = self._parse_conjunction() 3660 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3661 3662 if not start and self._match(TokenType.START_WITH): 3663 start = self._parse_conjunction() 3664 3665 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3666 3667 def _parse_name_as_expression(self) -> exp.Alias: 3668 return self.expression( 3669 exp.Alias, 3670 alias=self._parse_id_var(any_token=True), 3671 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3672 ) 3673 3674 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3675 if self._match_text_seq("INTERPOLATE"): 3676 return self._parse_wrapped_csv(self._parse_name_as_expression) 3677 return None 3678 3679 def _parse_order( 3680 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3681 ) -> t.Optional[exp.Expression]: 3682 siblings = None 3683 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3684 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3685 return this 3686 3687 siblings = True 3688 3689 return self.expression( 3690 exp.Order, 3691 this=this, 3692 expressions=self._parse_csv(self._parse_ordered), 3693 interpolate=self._parse_interpolate(), 3694 siblings=siblings, 3695 ) 3696 3697 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3698 if not self._match(token): 3699 return None 3700 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3701 3702 def _parse_ordered( 3703 self, parse_method: t.Optional[t.Callable] = None 3704 ) -> t.Optional[exp.Ordered]: 3705 this = parse_method() if parse_method else self._parse_conjunction() 3706 if not this: 3707 return None 3708 3709 asc = self._match(TokenType.ASC) 3710 desc = self._match(TokenType.DESC) or (asc and False) 3711 3712 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3713 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3714 3715 nulls_first = is_nulls_first or False 3716 explicitly_null_ordered = is_nulls_first or is_nulls_last 3717 3718 if ( 3719 not explicitly_null_ordered 3720 and ( 3721 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3722 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3723 ) 3724 and self.dialect.NULL_ORDERING != "nulls_are_last" 3725 ): 3726 nulls_first = True 3727 3728 if self._match_text_seq("WITH", "FILL"): 3729 with_fill = self.expression( 3730 exp.WithFill, 3731 **{ # type: ignore 3732 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3733 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3734 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3735 }, 3736 ) 3737 else: 3738 with_fill = None 3739 3740 return self.expression( 3741 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3742 ) 3743 3744 def _parse_limit( 3745 self, 3746 this: t.Optional[exp.Expression] = None, 3747 top: bool = False, 3748 skip_limit_token: bool = False, 3749 ) -> t.Optional[exp.Expression]: 3750 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3751 comments = self._prev_comments 3752 if top: 3753 limit_paren = self._match(TokenType.L_PAREN) 3754 expression = self._parse_term() if limit_paren else self._parse_number() 3755 3756 if limit_paren: 3757 self._match_r_paren() 3758 else: 3759 expression = self._parse_term() 3760 3761 if self._match(TokenType.COMMA): 3762 offset = expression 3763 expression = self._parse_term() 3764 else: 3765 offset = None 3766 3767 limit_exp = self.expression( 3768 exp.Limit, 3769 this=this, 3770 expression=expression, 3771 offset=offset, 3772 comments=comments, 3773 expressions=self._parse_limit_by(), 3774 ) 3775 3776 return limit_exp 3777 3778 if self._match(TokenType.FETCH): 3779 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3780 direction = self._prev.text.upper() if direction else "FIRST" 3781 3782 count = self._parse_field(tokens=self.FETCH_TOKENS) 3783 percent = self._match(TokenType.PERCENT) 3784 3785 self._match_set((TokenType.ROW, TokenType.ROWS)) 3786 3787 only = self._match_text_seq("ONLY") 3788 with_ties = self._match_text_seq("WITH", "TIES") 3789 3790 if only and with_ties: 3791 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3792 3793 return self.expression( 3794 exp.Fetch, 3795 direction=direction, 3796 count=count, 3797 percent=percent, 3798 with_ties=with_ties, 3799 ) 3800 3801 return this 3802 3803 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3804 if not self._match(TokenType.OFFSET): 3805 return this 3806 3807 count = self._parse_term() 3808 self._match_set((TokenType.ROW, TokenType.ROWS)) 3809 3810 return self.expression( 3811 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3812 ) 3813 3814 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3815 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3816 3817 def _parse_locks(self) -> t.List[exp.Lock]: 3818 locks = [] 3819 while True: 3820 if self._match_text_seq("FOR", "UPDATE"): 3821 update = True 3822 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3823 "LOCK", "IN", "SHARE", "MODE" 3824 ): 3825 update = False 3826 else: 3827 break 3828 3829 expressions = None 3830 if self._match_text_seq("OF"): 3831 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3832 3833 wait: t.Optional[bool | exp.Expression] = None 3834 if self._match_text_seq("NOWAIT"): 3835 wait = True 3836 elif self._match_text_seq("WAIT"): 3837 wait = self._parse_primary() 3838 elif self._match_text_seq("SKIP", "LOCKED"): 3839 wait = False 3840 3841 locks.append( 3842 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3843 ) 3844 3845 return locks 3846 3847 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3848 while this and self._match_set(self.SET_OPERATIONS): 3849 token_type = self._prev.token_type 3850 3851 if token_type == TokenType.UNION: 3852 operation = exp.Union 3853 elif token_type == TokenType.EXCEPT: 3854 operation = exp.Except 3855 else: 3856 operation = exp.Intersect 3857 3858 comments = self._prev.comments 3859 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3860 by_name = self._match_text_seq("BY", "NAME") 3861 expression = self._parse_select(nested=True, parse_set_operation=False) 3862 3863 this = self.expression( 3864 operation, 3865 comments=comments, 3866 this=this, 3867 distinct=distinct, 3868 by_name=by_name, 3869 expression=expression, 3870 ) 3871 3872 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3873 expression = this.expression 3874 3875 if expression: 3876 for arg in self.UNION_MODIFIERS: 3877 expr = expression.args.get(arg) 3878 if expr: 3879 this.set(arg, expr.pop()) 3880 3881 return this 3882 3883 def _parse_expression(self) -> t.Optional[exp.Expression]: 3884 return self._parse_alias(self._parse_conjunction()) 3885 3886 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3887 this = self._parse_equality() 3888 3889 if self._match(TokenType.COLON_EQ): 3890 this = self.expression( 3891 exp.PropertyEQ, 3892 this=this, 3893 comments=self._prev_comments, 3894 expression=self._parse_conjunction(), 3895 ) 3896 3897 while self._match_set(self.CONJUNCTION): 3898 this = self.expression( 3899 self.CONJUNCTION[self._prev.token_type], 3900 this=this, 3901 comments=self._prev_comments, 3902 expression=self._parse_equality(), 3903 ) 3904 return this 3905 3906 def _parse_equality(self) -> t.Optional[exp.Expression]: 3907 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3908 3909 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3910 return self._parse_tokens(self._parse_range, self.COMPARISON) 3911 3912 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3913 this = this or self._parse_bitwise() 3914 negate = self._match(TokenType.NOT) 3915 3916 if self._match_set(self.RANGE_PARSERS): 3917 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3918 if not expression: 3919 return this 3920 3921 this = expression 3922 elif self._match(TokenType.ISNULL): 3923 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3924 3925 # Postgres supports ISNULL and NOTNULL for conditions. 3926 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3927 if self._match(TokenType.NOTNULL): 3928 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3929 this = self.expression(exp.Not, this=this) 3930 3931 if negate: 3932 this = self.expression(exp.Not, this=this) 3933 3934 if self._match(TokenType.IS): 3935 this = self._parse_is(this) 3936 3937 return this 3938 3939 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3940 index = self._index - 1 3941 negate = self._match(TokenType.NOT) 3942 3943 if self._match_text_seq("DISTINCT", "FROM"): 3944 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3945 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3946 3947 expression = self._parse_null() or self._parse_boolean() 3948 if not expression: 3949 self._retreat(index) 3950 return None 3951 3952 this = self.expression(exp.Is, this=this, expression=expression) 3953 return self.expression(exp.Not, this=this) if negate else this 3954 3955 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3956 unnest = self._parse_unnest(with_alias=False) 3957 if unnest: 3958 this = self.expression(exp.In, this=this, unnest=unnest) 3959 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3960 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3961 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3962 3963 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3964 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3965 else: 3966 this = self.expression(exp.In, this=this, expressions=expressions) 3967 3968 if matched_l_paren: 3969 self._match_r_paren(this) 3970 elif not self._match(TokenType.R_BRACKET, expression=this): 3971 self.raise_error("Expecting ]") 3972 else: 3973 this = self.expression(exp.In, this=this, field=self._parse_field()) 3974 3975 return this 3976 3977 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3978 low = self._parse_bitwise() 3979 self._match(TokenType.AND) 3980 high = self._parse_bitwise() 3981 return self.expression(exp.Between, this=this, low=low, high=high) 3982 3983 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3984 if not self._match(TokenType.ESCAPE): 3985 return this 3986 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3987 3988 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3989 index = self._index 3990 3991 if not self._match(TokenType.INTERVAL) and match_interval: 3992 return None 3993 3994 if self._match(TokenType.STRING, advance=False): 3995 this = self._parse_primary() 3996 else: 3997 this = self._parse_term() 3998 3999 if not this or ( 4000 isinstance(this, exp.Column) 4001 and not this.table 4002 and not this.this.quoted 4003 and this.name.upper() == "IS" 4004 ): 4005 self._retreat(index) 4006 return None 4007 4008 unit = self._parse_function() or ( 4009 not self._match(TokenType.ALIAS, advance=False) 4010 and self._parse_var(any_token=True, upper=True) 4011 ) 4012 4013 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4014 # each INTERVAL expression into this canonical form so it's easy to transpile 4015 if this and this.is_number: 4016 this = exp.Literal.string(this.name) 4017 elif this and this.is_string: 4018 parts = this.name.split() 4019 4020 if len(parts) == 2: 4021 if unit: 4022 # This is not actually a unit, it's something else (e.g. a "window side") 4023 unit = None 4024 self._retreat(self._index - 1) 4025 4026 this = exp.Literal.string(parts[0]) 4027 unit = self.expression(exp.Var, this=parts[1].upper()) 4028 4029 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4030 unit = self.expression( 4031 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4032 ) 4033 4034 return self.expression(exp.Interval, this=this, unit=unit) 4035 4036 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4037 this = self._parse_term() 4038 4039 while True: 4040 if self._match_set(self.BITWISE): 4041 this = self.expression( 4042 self.BITWISE[self._prev.token_type], 4043 this=this, 4044 expression=self._parse_term(), 4045 ) 4046 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4047 this = self.expression( 4048 exp.DPipe, 4049 this=this, 4050 expression=self._parse_term(), 4051 safe=not self.dialect.STRICT_STRING_CONCAT, 4052 ) 4053 elif self._match(TokenType.DQMARK): 4054 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4055 elif self._match_pair(TokenType.LT, TokenType.LT): 4056 this = self.expression( 4057 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4058 ) 4059 elif self._match_pair(TokenType.GT, TokenType.GT): 4060 this = self.expression( 4061 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4062 ) 4063 else: 4064 break 4065 4066 return this 4067 4068 def _parse_term(self) -> t.Optional[exp.Expression]: 4069 return self._parse_tokens(self._parse_factor, self.TERM) 4070 4071 def _parse_factor(self) -> t.Optional[exp.Expression]: 4072 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4073 this = parse_method() 4074 4075 while self._match_set(self.FACTOR): 4076 this = self.expression( 4077 self.FACTOR[self._prev.token_type], 4078 this=this, 4079 comments=self._prev_comments, 4080 expression=parse_method(), 4081 ) 4082 if isinstance(this, exp.Div): 4083 this.args["typed"] = self.dialect.TYPED_DIVISION 4084 this.args["safe"] = self.dialect.SAFE_DIVISION 4085 4086 return this 4087 4088 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4089 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4090 4091 def _parse_unary(self) -> t.Optional[exp.Expression]: 4092 if self._match_set(self.UNARY_PARSERS): 4093 return self.UNARY_PARSERS[self._prev.token_type](self) 4094 return self._parse_at_time_zone(self._parse_type()) 4095 4096 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4097 interval = parse_interval and self._parse_interval() 4098 if interval: 4099 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4100 while True: 4101 index = self._index 4102 self._match(TokenType.PLUS) 4103 4104 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4105 self._retreat(index) 4106 break 4107 4108 interval = self.expression( # type: ignore 4109 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4110 ) 4111 4112 return interval 4113 4114 index = self._index 4115 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4116 this = self._parse_column() 4117 4118 if data_type: 4119 if isinstance(this, exp.Literal): 4120 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4121 if parser: 4122 return parser(self, this, data_type) 4123 return self.expression(exp.Cast, this=this, to=data_type) 4124 if not data_type.expressions: 4125 self._retreat(index) 4126 return self._parse_column() 4127 return self._parse_column_ops(data_type) 4128 4129 return this and self._parse_column_ops(this) 4130 4131 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4132 this = self._parse_type() 4133 if not this: 4134 return None 4135 4136 if isinstance(this, exp.Column) and not this.table: 4137 this = exp.var(this.name.upper()) 4138 4139 return self.expression( 4140 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4141 ) 4142 4143 def _parse_types( 4144 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4145 ) -> t.Optional[exp.Expression]: 4146 index = self._index 4147 4148 this: t.Optional[exp.Expression] = None 4149 prefix = self._match_text_seq("SYSUDTLIB", ".") 4150 4151 if not self._match_set(self.TYPE_TOKENS): 4152 identifier = allow_identifiers and self._parse_id_var( 4153 any_token=False, tokens=(TokenType.VAR,) 4154 ) 4155 if identifier: 4156 tokens = self.dialect.tokenize(identifier.name) 4157 4158 if len(tokens) != 1: 4159 self.raise_error("Unexpected identifier", self._prev) 4160 4161 if tokens[0].token_type in self.TYPE_TOKENS: 4162 self._prev = tokens[0] 4163 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4164 type_name = identifier.name 4165 4166 while self._match(TokenType.DOT): 4167 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4168 4169 this = exp.DataType.build(type_name, udt=True) 4170 else: 4171 self._retreat(self._index - 1) 4172 return None 4173 else: 4174 return None 4175 4176 type_token = self._prev.token_type 4177 4178 if type_token == TokenType.PSEUDO_TYPE: 4179 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4180 4181 if type_token == TokenType.OBJECT_IDENTIFIER: 4182 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4183 4184 nested = type_token in self.NESTED_TYPE_TOKENS 4185 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4186 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4187 expressions = None 4188 maybe_func = False 4189 4190 if self._match(TokenType.L_PAREN): 4191 if is_struct: 4192 expressions = self._parse_csv(self._parse_struct_types) 4193 elif nested: 4194 expressions = self._parse_csv( 4195 lambda: self._parse_types( 4196 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4197 ) 4198 ) 4199 elif type_token in self.ENUM_TYPE_TOKENS: 4200 expressions = self._parse_csv(self._parse_equality) 4201 elif is_aggregate: 4202 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4203 any_token=False, tokens=(TokenType.VAR,) 4204 ) 4205 if not func_or_ident or not self._match(TokenType.COMMA): 4206 return None 4207 expressions = self._parse_csv( 4208 lambda: self._parse_types( 4209 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4210 ) 4211 ) 4212 expressions.insert(0, func_or_ident) 4213 else: 4214 expressions = self._parse_csv(self._parse_type_size) 4215 4216 if not expressions or not self._match(TokenType.R_PAREN): 4217 self._retreat(index) 4218 return None 4219 4220 maybe_func = True 4221 4222 values: t.Optional[t.List[exp.Expression]] = None 4223 4224 if nested and self._match(TokenType.LT): 4225 if is_struct: 4226 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4227 else: 4228 expressions = self._parse_csv( 4229 lambda: self._parse_types( 4230 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4231 ) 4232 ) 4233 4234 if not self._match(TokenType.GT): 4235 self.raise_error("Expecting >") 4236 4237 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4238 values = self._parse_csv(self._parse_conjunction) 4239 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4240 4241 if type_token in self.TIMESTAMPS: 4242 if self._match_text_seq("WITH", "TIME", "ZONE"): 4243 maybe_func = False 4244 tz_type = ( 4245 exp.DataType.Type.TIMETZ 4246 if type_token in self.TIMES 4247 else exp.DataType.Type.TIMESTAMPTZ 4248 ) 4249 this = exp.DataType(this=tz_type, expressions=expressions) 4250 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4251 maybe_func = False 4252 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4253 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4254 maybe_func = False 4255 elif type_token == TokenType.INTERVAL: 4256 unit = self._parse_var(upper=True) 4257 if unit: 4258 if self._match_text_seq("TO"): 4259 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4260 4261 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4262 else: 4263 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4264 4265 if maybe_func and check_func: 4266 index2 = self._index 4267 peek = self._parse_string() 4268 4269 if not peek: 4270 self._retreat(index) 4271 return None 4272 4273 self._retreat(index2) 4274 4275 if not this: 4276 if self._match_text_seq("UNSIGNED"): 4277 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4278 if not unsigned_type_token: 4279 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4280 4281 type_token = unsigned_type_token or type_token 4282 4283 this = exp.DataType( 4284 this=exp.DataType.Type[type_token.value], 4285 expressions=expressions, 4286 nested=nested, 4287 values=values, 4288 prefix=prefix, 4289 ) 4290 elif expressions: 4291 this.set("expressions", expressions) 4292 4293 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4294 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4295 4296 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4297 converter = self.TYPE_CONVERTER.get(this.this) 4298 if converter: 4299 this = converter(t.cast(exp.DataType, this)) 4300 4301 return this 4302 4303 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4304 index = self._index 4305 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4306 self._match(TokenType.COLON) 4307 column_def = self._parse_column_def(this) 4308 4309 if type_required and ( 4310 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4311 ): 4312 self._retreat(index) 4313 return self._parse_types() 4314 4315 return column_def 4316 4317 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4318 if not self._match_text_seq("AT", "TIME", "ZONE"): 4319 return this 4320 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4321 4322 def _parse_column(self) -> t.Optional[exp.Expression]: 4323 this = self._parse_column_reference() 4324 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4325 4326 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4327 this = self._parse_field() 4328 if ( 4329 not this 4330 and self._match(TokenType.VALUES, advance=False) 4331 and self.VALUES_FOLLOWED_BY_PAREN 4332 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4333 ): 4334 this = self._parse_id_var() 4335 4336 if isinstance(this, exp.Identifier): 4337 # We bubble up comments from the Identifier to the Column 4338 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4339 4340 return this 4341 4342 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4343 this = self._parse_bracket(this) 4344 4345 while self._match_set(self.COLUMN_OPERATORS): 4346 op_token = self._prev.token_type 4347 op = self.COLUMN_OPERATORS.get(op_token) 4348 4349 if op_token == TokenType.DCOLON: 4350 field = self._parse_types() 4351 if not field: 4352 self.raise_error("Expected type") 4353 elif op and self._curr: 4354 field = self._parse_column_reference() 4355 else: 4356 field = self._parse_field(any_token=True, anonymous_func=True) 4357 4358 if isinstance(field, exp.Func) and this: 4359 # bigquery allows function calls like x.y.count(...) 4360 # SAFE.SUBSTR(...) 4361 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4362 this = exp.replace_tree( 4363 this, 4364 lambda n: ( 4365 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4366 if n.table 4367 else n.this 4368 ) 4369 if isinstance(n, exp.Column) 4370 else n, 4371 ) 4372 4373 if op: 4374 this = op(self, this, field) 4375 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4376 this = self.expression( 4377 exp.Column, 4378 this=field, 4379 table=this.this, 4380 db=this.args.get("table"), 4381 catalog=this.args.get("db"), 4382 ) 4383 else: 4384 this = self.expression(exp.Dot, this=this, expression=field) 4385 this = self._parse_bracket(this) 4386 return this 4387 4388 def _parse_primary(self) -> t.Optional[exp.Expression]: 4389 if self._match_set(self.PRIMARY_PARSERS): 4390 token_type = self._prev.token_type 4391 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4392 4393 if token_type == TokenType.STRING: 4394 expressions = [primary] 4395 while self._match(TokenType.STRING): 4396 expressions.append(exp.Literal.string(self._prev.text)) 4397 4398 if len(expressions) > 1: 4399 return self.expression(exp.Concat, expressions=expressions) 4400 4401 return primary 4402 4403 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4404 return exp.Literal.number(f"0.{self._prev.text}") 4405 4406 if self._match(TokenType.L_PAREN): 4407 comments = self._prev_comments 4408 query = self._parse_select() 4409 4410 if query: 4411 expressions = [query] 4412 else: 4413 expressions = self._parse_expressions() 4414 4415 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4416 4417 if not this and self._match(TokenType.R_PAREN, advance=False): 4418 this = self.expression(exp.Tuple) 4419 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4420 this = self._parse_subquery(this=this, parse_alias=False) 4421 elif isinstance(this, exp.Subquery): 4422 this = self._parse_subquery( 4423 this=self._parse_set_operations(this), parse_alias=False 4424 ) 4425 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4426 this = self.expression(exp.Tuple, expressions=expressions) 4427 else: 4428 this = self.expression(exp.Paren, this=this) 4429 4430 if this: 4431 this.add_comments(comments) 4432 4433 self._match_r_paren(expression=this) 4434 return this 4435 4436 return None 4437 4438 def _parse_field( 4439 self, 4440 any_token: bool = False, 4441 tokens: t.Optional[t.Collection[TokenType]] = None, 4442 anonymous_func: bool = False, 4443 ) -> t.Optional[exp.Expression]: 4444 if anonymous_func: 4445 field = ( 4446 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4447 or self._parse_primary() 4448 ) 4449 else: 4450 field = self._parse_primary() or self._parse_function( 4451 anonymous=anonymous_func, any_token=any_token 4452 ) 4453 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4454 4455 def _parse_function( 4456 self, 4457 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4458 anonymous: bool = False, 4459 optional_parens: bool = True, 4460 any_token: bool = False, 4461 ) -> t.Optional[exp.Expression]: 4462 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4463 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4464 fn_syntax = False 4465 if ( 4466 self._match(TokenType.L_BRACE, advance=False) 4467 and self._next 4468 and self._next.text.upper() == "FN" 4469 ): 4470 self._advance(2) 4471 fn_syntax = True 4472 4473 func = self._parse_function_call( 4474 functions=functions, 4475 anonymous=anonymous, 4476 optional_parens=optional_parens, 4477 any_token=any_token, 4478 ) 4479 4480 if fn_syntax: 4481 self._match(TokenType.R_BRACE) 4482 4483 return func 4484 4485 def _parse_function_call( 4486 self, 4487 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4488 anonymous: bool = False, 4489 optional_parens: bool = True, 4490 any_token: bool = False, 4491 ) -> t.Optional[exp.Expression]: 4492 if not self._curr: 4493 return None 4494 4495 comments = self._curr.comments 4496 token_type = self._curr.token_type 4497 this = self._curr.text 4498 upper = this.upper() 4499 4500 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4501 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4502 self._advance() 4503 return self._parse_window(parser(self)) 4504 4505 if not self._next or self._next.token_type != TokenType.L_PAREN: 4506 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4507 self._advance() 4508 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4509 4510 return None 4511 4512 if any_token: 4513 if token_type in self.RESERVED_TOKENS: 4514 return None 4515 elif token_type not in self.FUNC_TOKENS: 4516 return None 4517 4518 self._advance(2) 4519 4520 parser = self.FUNCTION_PARSERS.get(upper) 4521 if parser and not anonymous: 4522 this = parser(self) 4523 else: 4524 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4525 4526 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4527 this = self.expression(subquery_predicate, this=self._parse_select()) 4528 self._match_r_paren() 4529 return this 4530 4531 if functions is None: 4532 functions = self.FUNCTIONS 4533 4534 function = functions.get(upper) 4535 4536 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4537 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4538 4539 if alias: 4540 args = self._kv_to_prop_eq(args) 4541 4542 if function and not anonymous: 4543 if "dialect" in function.__code__.co_varnames: 4544 func = function(args, dialect=self.dialect) 4545 else: 4546 func = function(args) 4547 4548 func = self.validate_expression(func, args) 4549 if not self.dialect.NORMALIZE_FUNCTIONS: 4550 func.meta["name"] = this 4551 4552 this = func 4553 else: 4554 if token_type == TokenType.IDENTIFIER: 4555 this = exp.Identifier(this=this, quoted=True) 4556 this = self.expression(exp.Anonymous, this=this, expressions=args) 4557 4558 if isinstance(this, exp.Expression): 4559 this.add_comments(comments) 4560 4561 self._match_r_paren(this) 4562 return self._parse_window(this) 4563 4564 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4565 transformed = [] 4566 4567 for e in expressions: 4568 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4569 if isinstance(e, exp.Alias): 4570 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4571 4572 if not isinstance(e, exp.PropertyEQ): 4573 e = self.expression( 4574 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4575 ) 4576 4577 if isinstance(e.this, exp.Column): 4578 e.this.replace(e.this.this) 4579 4580 transformed.append(e) 4581 4582 return transformed 4583 4584 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4585 return self._parse_column_def(self._parse_id_var()) 4586 4587 def _parse_user_defined_function( 4588 self, kind: t.Optional[TokenType] = None 4589 ) -> t.Optional[exp.Expression]: 4590 this = self._parse_id_var() 4591 4592 while self._match(TokenType.DOT): 4593 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4594 4595 if not self._match(TokenType.L_PAREN): 4596 return this 4597 4598 expressions = self._parse_csv(self._parse_function_parameter) 4599 self._match_r_paren() 4600 return self.expression( 4601 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4602 ) 4603 4604 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4605 literal = self._parse_primary() 4606 if literal: 4607 return self.expression(exp.Introducer, this=token.text, expression=literal) 4608 4609 return self.expression(exp.Identifier, this=token.text) 4610 4611 def _parse_session_parameter(self) -> exp.SessionParameter: 4612 kind = None 4613 this = self._parse_id_var() or self._parse_primary() 4614 4615 if this and self._match(TokenType.DOT): 4616 kind = this.name 4617 this = self._parse_var() or self._parse_primary() 4618 4619 return self.expression(exp.SessionParameter, this=this, kind=kind) 4620 4621 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4622 index = self._index 4623 4624 if self._match(TokenType.L_PAREN): 4625 expressions = t.cast( 4626 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4627 ) 4628 4629 if not self._match(TokenType.R_PAREN): 4630 self._retreat(index) 4631 else: 4632 expressions = [self._parse_id_var()] 4633 4634 if self._match_set(self.LAMBDAS): 4635 return self.LAMBDAS[self._prev.token_type](self, expressions) 4636 4637 self._retreat(index) 4638 4639 this: t.Optional[exp.Expression] 4640 4641 if self._match(TokenType.DISTINCT): 4642 this = self.expression( 4643 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4644 ) 4645 else: 4646 this = self._parse_select_or_expression(alias=alias) 4647 4648 return self._parse_limit( 4649 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4650 ) 4651 4652 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4653 index = self._index 4654 if not self._match(TokenType.L_PAREN): 4655 return this 4656 4657 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4658 # expr can be of both types 4659 if self._match_set(self.SELECT_START_TOKENS): 4660 self._retreat(index) 4661 return this 4662 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4663 self._match_r_paren() 4664 return self.expression(exp.Schema, this=this, expressions=args) 4665 4666 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4667 return self._parse_column_def(self._parse_field(any_token=True)) 4668 4669 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4670 # column defs are not really columns, they're identifiers 4671 if isinstance(this, exp.Column): 4672 this = this.this 4673 4674 kind = self._parse_types(schema=True) 4675 4676 if self._match_text_seq("FOR", "ORDINALITY"): 4677 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4678 4679 constraints: t.List[exp.Expression] = [] 4680 4681 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4682 ("ALIAS", "MATERIALIZED") 4683 ): 4684 persisted = self._prev.text.upper() == "MATERIALIZED" 4685 constraints.append( 4686 self.expression( 4687 exp.ComputedColumnConstraint, 4688 this=self._parse_conjunction(), 4689 persisted=persisted or self._match_text_seq("PERSISTED"), 4690 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4691 ) 4692 ) 4693 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4694 self._match(TokenType.ALIAS) 4695 constraints.append( 4696 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4697 ) 4698 4699 while True: 4700 constraint = self._parse_column_constraint() 4701 if not constraint: 4702 break 4703 constraints.append(constraint) 4704 4705 if not kind and not constraints: 4706 return this 4707 4708 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4709 4710 def _parse_auto_increment( 4711 self, 4712 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4713 start = None 4714 increment = None 4715 4716 if self._match(TokenType.L_PAREN, advance=False): 4717 args = self._parse_wrapped_csv(self._parse_bitwise) 4718 start = seq_get(args, 0) 4719 increment = seq_get(args, 1) 4720 elif self._match_text_seq("START"): 4721 start = self._parse_bitwise() 4722 self._match_text_seq("INCREMENT") 4723 increment = self._parse_bitwise() 4724 4725 if start and increment: 4726 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4727 4728 return exp.AutoIncrementColumnConstraint() 4729 4730 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4731 if not self._match_text_seq("REFRESH"): 4732 self._retreat(self._index - 1) 4733 return None 4734 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4735 4736 def _parse_compress(self) -> exp.CompressColumnConstraint: 4737 if self._match(TokenType.L_PAREN, advance=False): 4738 return self.expression( 4739 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4740 ) 4741 4742 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4743 4744 def _parse_generated_as_identity( 4745 self, 4746 ) -> ( 4747 exp.GeneratedAsIdentityColumnConstraint 4748 | exp.ComputedColumnConstraint 4749 | exp.GeneratedAsRowColumnConstraint 4750 ): 4751 if self._match_text_seq("BY", "DEFAULT"): 4752 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4753 this = self.expression( 4754 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4755 ) 4756 else: 4757 self._match_text_seq("ALWAYS") 4758 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4759 4760 self._match(TokenType.ALIAS) 4761 4762 if self._match_text_seq("ROW"): 4763 start = self._match_text_seq("START") 4764 if not start: 4765 self._match(TokenType.END) 4766 hidden = self._match_text_seq("HIDDEN") 4767 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4768 4769 identity = self._match_text_seq("IDENTITY") 4770 4771 if self._match(TokenType.L_PAREN): 4772 if self._match(TokenType.START_WITH): 4773 this.set("start", self._parse_bitwise()) 4774 if self._match_text_seq("INCREMENT", "BY"): 4775 this.set("increment", self._parse_bitwise()) 4776 if self._match_text_seq("MINVALUE"): 4777 this.set("minvalue", self._parse_bitwise()) 4778 if self._match_text_seq("MAXVALUE"): 4779 this.set("maxvalue", self._parse_bitwise()) 4780 4781 if self._match_text_seq("CYCLE"): 4782 this.set("cycle", True) 4783 elif self._match_text_seq("NO", "CYCLE"): 4784 this.set("cycle", False) 4785 4786 if not identity: 4787 this.set("expression", self._parse_range()) 4788 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4789 args = self._parse_csv(self._parse_bitwise) 4790 this.set("start", seq_get(args, 0)) 4791 this.set("increment", seq_get(args, 1)) 4792 4793 self._match_r_paren() 4794 4795 return this 4796 4797 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4798 self._match_text_seq("LENGTH") 4799 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4800 4801 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4802 if self._match_text_seq("NULL"): 4803 return self.expression(exp.NotNullColumnConstraint) 4804 if self._match_text_seq("CASESPECIFIC"): 4805 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4806 if self._match_text_seq("FOR", "REPLICATION"): 4807 return self.expression(exp.NotForReplicationColumnConstraint) 4808 return None 4809 4810 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4811 if self._match(TokenType.CONSTRAINT): 4812 this = self._parse_id_var() 4813 else: 4814 this = None 4815 4816 if self._match_texts(self.CONSTRAINT_PARSERS): 4817 return self.expression( 4818 exp.ColumnConstraint, 4819 this=this, 4820 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4821 ) 4822 4823 return this 4824 4825 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4826 if not self._match(TokenType.CONSTRAINT): 4827 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4828 4829 return self.expression( 4830 exp.Constraint, 4831 this=self._parse_id_var(), 4832 expressions=self._parse_unnamed_constraints(), 4833 ) 4834 4835 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4836 constraints = [] 4837 while True: 4838 constraint = self._parse_unnamed_constraint() or self._parse_function() 4839 if not constraint: 4840 break 4841 constraints.append(constraint) 4842 4843 return constraints 4844 4845 def _parse_unnamed_constraint( 4846 self, constraints: t.Optional[t.Collection[str]] = None 4847 ) -> t.Optional[exp.Expression]: 4848 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4849 constraints or self.CONSTRAINT_PARSERS 4850 ): 4851 return None 4852 4853 constraint = self._prev.text.upper() 4854 if constraint not in self.CONSTRAINT_PARSERS: 4855 self.raise_error(f"No parser found for schema constraint {constraint}.") 4856 4857 return self.CONSTRAINT_PARSERS[constraint](self) 4858 4859 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4860 self._match_text_seq("KEY") 4861 return self.expression( 4862 exp.UniqueColumnConstraint, 4863 this=self._parse_schema(self._parse_id_var(any_token=False)), 4864 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4865 on_conflict=self._parse_on_conflict(), 4866 ) 4867 4868 def _parse_key_constraint_options(self) -> t.List[str]: 4869 options = [] 4870 while True: 4871 if not self._curr: 4872 break 4873 4874 if self._match(TokenType.ON): 4875 action = None 4876 on = self._advance_any() and self._prev.text 4877 4878 if self._match_text_seq("NO", "ACTION"): 4879 action = "NO ACTION" 4880 elif self._match_text_seq("CASCADE"): 4881 action = "CASCADE" 4882 elif self._match_text_seq("RESTRICT"): 4883 action = "RESTRICT" 4884 elif self._match_pair(TokenType.SET, TokenType.NULL): 4885 action = "SET NULL" 4886 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4887 action = "SET DEFAULT" 4888 else: 4889 self.raise_error("Invalid key constraint") 4890 4891 options.append(f"ON {on} {action}") 4892 elif self._match_text_seq("NOT", "ENFORCED"): 4893 options.append("NOT ENFORCED") 4894 elif self._match_text_seq("DEFERRABLE"): 4895 options.append("DEFERRABLE") 4896 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4897 options.append("INITIALLY DEFERRED") 4898 elif self._match_text_seq("NORELY"): 4899 options.append("NORELY") 4900 elif self._match_text_seq("MATCH", "FULL"): 4901 options.append("MATCH FULL") 4902 else: 4903 break 4904 4905 return options 4906 4907 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4908 if match and not self._match(TokenType.REFERENCES): 4909 return None 4910 4911 expressions = None 4912 this = self._parse_table(schema=True) 4913 options = self._parse_key_constraint_options() 4914 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4915 4916 def _parse_foreign_key(self) -> exp.ForeignKey: 4917 expressions = self._parse_wrapped_id_vars() 4918 reference = self._parse_references() 4919 options = {} 4920 4921 while self._match(TokenType.ON): 4922 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4923 self.raise_error("Expected DELETE or UPDATE") 4924 4925 kind = self._prev.text.lower() 4926 4927 if self._match_text_seq("NO", "ACTION"): 4928 action = "NO ACTION" 4929 elif self._match(TokenType.SET): 4930 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4931 action = "SET " + self._prev.text.upper() 4932 else: 4933 self._advance() 4934 action = self._prev.text.upper() 4935 4936 options[kind] = action 4937 4938 return self.expression( 4939 exp.ForeignKey, 4940 expressions=expressions, 4941 reference=reference, 4942 **options, # type: ignore 4943 ) 4944 4945 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4946 return self._parse_field() 4947 4948 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4949 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4950 self._retreat(self._index - 1) 4951 return None 4952 4953 id_vars = self._parse_wrapped_id_vars() 4954 return self.expression( 4955 exp.PeriodForSystemTimeConstraint, 4956 this=seq_get(id_vars, 0), 4957 expression=seq_get(id_vars, 1), 4958 ) 4959 4960 def _parse_primary_key( 4961 self, wrapped_optional: bool = False, in_props: bool = False 4962 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4963 desc = ( 4964 self._match_set((TokenType.ASC, TokenType.DESC)) 4965 and self._prev.token_type == TokenType.DESC 4966 ) 4967 4968 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4969 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4970 4971 expressions = self._parse_wrapped_csv( 4972 self._parse_primary_key_part, optional=wrapped_optional 4973 ) 4974 options = self._parse_key_constraint_options() 4975 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4976 4977 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4978 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4979 4980 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4981 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4982 return this 4983 4984 bracket_kind = self._prev.token_type 4985 expressions = self._parse_csv( 4986 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4987 ) 4988 4989 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4990 self.raise_error("Expected ]") 4991 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4992 self.raise_error("Expected }") 4993 4994 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4995 if bracket_kind == TokenType.L_BRACE: 4996 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4997 elif not this or this.name.upper() == "ARRAY": 4998 this = self.expression(exp.Array, expressions=expressions) 4999 else: 5000 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5001 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5002 5003 self._add_comments(this) 5004 return self._parse_bracket(this) 5005 5006 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5007 if self._match(TokenType.COLON): 5008 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5009 return this 5010 5011 def _parse_case(self) -> t.Optional[exp.Expression]: 5012 ifs = [] 5013 default = None 5014 5015 comments = self._prev_comments 5016 expression = self._parse_conjunction() 5017 5018 while self._match(TokenType.WHEN): 5019 this = self._parse_conjunction() 5020 self._match(TokenType.THEN) 5021 then = self._parse_conjunction() 5022 ifs.append(self.expression(exp.If, this=this, true=then)) 5023 5024 if self._match(TokenType.ELSE): 5025 default = self._parse_conjunction() 5026 5027 if not self._match(TokenType.END): 5028 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5029 default = exp.column("interval") 5030 else: 5031 self.raise_error("Expected END after CASE", self._prev) 5032 5033 return self.expression( 5034 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5035 ) 5036 5037 def _parse_if(self) -> t.Optional[exp.Expression]: 5038 if self._match(TokenType.L_PAREN): 5039 args = self._parse_csv(self._parse_conjunction) 5040 this = self.validate_expression(exp.If.from_arg_list(args), args) 5041 self._match_r_paren() 5042 else: 5043 index = self._index - 1 5044 5045 if self.NO_PAREN_IF_COMMANDS and index == 0: 5046 return self._parse_as_command(self._prev) 5047 5048 condition = self._parse_conjunction() 5049 5050 if not condition: 5051 self._retreat(index) 5052 return None 5053 5054 self._match(TokenType.THEN) 5055 true = self._parse_conjunction() 5056 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5057 self._match(TokenType.END) 5058 this = self.expression(exp.If, this=condition, true=true, false=false) 5059 5060 return this 5061 5062 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5063 if not self._match_text_seq("VALUE", "FOR"): 5064 self._retreat(self._index - 1) 5065 return None 5066 5067 return self.expression( 5068 exp.NextValueFor, 5069 this=self._parse_column(), 5070 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5071 ) 5072 5073 def _parse_extract(self) -> exp.Extract: 5074 this = self._parse_function() or self._parse_var() or self._parse_type() 5075 5076 if self._match(TokenType.FROM): 5077 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5078 5079 if not self._match(TokenType.COMMA): 5080 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5081 5082 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5083 5084 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5085 this = self._parse_conjunction() 5086 5087 if not self._match(TokenType.ALIAS): 5088 if self._match(TokenType.COMMA): 5089 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5090 5091 self.raise_error("Expected AS after CAST") 5092 5093 fmt = None 5094 to = self._parse_types() 5095 5096 if self._match(TokenType.FORMAT): 5097 fmt_string = self._parse_string() 5098 fmt = self._parse_at_time_zone(fmt_string) 5099 5100 if not to: 5101 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5102 if to.this in exp.DataType.TEMPORAL_TYPES: 5103 this = self.expression( 5104 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5105 this=this, 5106 format=exp.Literal.string( 5107 format_time( 5108 fmt_string.this if fmt_string else "", 5109 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5110 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5111 ) 5112 ), 5113 ) 5114 5115 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5116 this.set("zone", fmt.args["zone"]) 5117 return this 5118 elif not to: 5119 self.raise_error("Expected TYPE after CAST") 5120 elif isinstance(to, exp.Identifier): 5121 to = exp.DataType.build(to.name, udt=True) 5122 elif to.this == exp.DataType.Type.CHAR: 5123 if self._match(TokenType.CHARACTER_SET): 5124 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5125 5126 return self.expression( 5127 exp.Cast if strict else exp.TryCast, 5128 this=this, 5129 to=to, 5130 format=fmt, 5131 safe=safe, 5132 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5133 ) 5134 5135 def _parse_string_agg(self) -> exp.Expression: 5136 if self._match(TokenType.DISTINCT): 5137 args: t.List[t.Optional[exp.Expression]] = [ 5138 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5139 ] 5140 if self._match(TokenType.COMMA): 5141 args.extend(self._parse_csv(self._parse_conjunction)) 5142 else: 5143 args = self._parse_csv(self._parse_conjunction) # type: ignore 5144 5145 index = self._index 5146 if not self._match(TokenType.R_PAREN) and args: 5147 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5148 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5149 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5150 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5151 5152 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5153 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5154 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5155 if not self._match_text_seq("WITHIN", "GROUP"): 5156 self._retreat(index) 5157 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5158 5159 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5160 order = self._parse_order(this=seq_get(args, 0)) 5161 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5162 5163 def _parse_convert( 5164 self, strict: bool, safe: t.Optional[bool] = None 5165 ) -> t.Optional[exp.Expression]: 5166 this = self._parse_bitwise() 5167 5168 if self._match(TokenType.USING): 5169 to: t.Optional[exp.Expression] = self.expression( 5170 exp.CharacterSet, this=self._parse_var() 5171 ) 5172 elif self._match(TokenType.COMMA): 5173 to = self._parse_types() 5174 else: 5175 to = None 5176 5177 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5178 5179 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5180 """ 5181 There are generally two variants of the DECODE function: 5182 5183 - DECODE(bin, charset) 5184 - DECODE(expression, search, result [, search, result] ... [, default]) 5185 5186 The second variant will always be parsed into a CASE expression. Note that NULL 5187 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5188 instead of relying on pattern matching. 5189 """ 5190 args = self._parse_csv(self._parse_conjunction) 5191 5192 if len(args) < 3: 5193 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5194 5195 expression, *expressions = args 5196 if not expression: 5197 return None 5198 5199 ifs = [] 5200 for search, result in zip(expressions[::2], expressions[1::2]): 5201 if not search or not result: 5202 return None 5203 5204 if isinstance(search, exp.Literal): 5205 ifs.append( 5206 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5207 ) 5208 elif isinstance(search, exp.Null): 5209 ifs.append( 5210 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5211 ) 5212 else: 5213 cond = exp.or_( 5214 exp.EQ(this=expression.copy(), expression=search), 5215 exp.and_( 5216 exp.Is(this=expression.copy(), expression=exp.Null()), 5217 exp.Is(this=search.copy(), expression=exp.Null()), 5218 copy=False, 5219 ), 5220 copy=False, 5221 ) 5222 ifs.append(exp.If(this=cond, true=result)) 5223 5224 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5225 5226 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5227 self._match_text_seq("KEY") 5228 key = self._parse_column() 5229 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5230 self._match_text_seq("VALUE") 5231 value = self._parse_bitwise() 5232 5233 if not key and not value: 5234 return None 5235 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5236 5237 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5238 if not this or not self._match_text_seq("FORMAT", "JSON"): 5239 return this 5240 5241 return self.expression(exp.FormatJson, this=this) 5242 5243 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5244 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5245 for value in values: 5246 if self._match_text_seq(value, "ON", on): 5247 return f"{value} ON {on}" 5248 5249 return None 5250 5251 @t.overload 5252 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5253 5254 @t.overload 5255 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5256 5257 def _parse_json_object(self, agg=False): 5258 star = self._parse_star() 5259 expressions = ( 5260 [star] 5261 if star 5262 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5263 ) 5264 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5265 5266 unique_keys = None 5267 if self._match_text_seq("WITH", "UNIQUE"): 5268 unique_keys = True 5269 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5270 unique_keys = False 5271 5272 self._match_text_seq("KEYS") 5273 5274 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5275 self._parse_type() 5276 ) 5277 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5278 5279 return self.expression( 5280 exp.JSONObjectAgg if agg else exp.JSONObject, 5281 expressions=expressions, 5282 null_handling=null_handling, 5283 unique_keys=unique_keys, 5284 return_type=return_type, 5285 encoding=encoding, 5286 ) 5287 5288 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5289 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5290 if not self._match_text_seq("NESTED"): 5291 this = self._parse_id_var() 5292 kind = self._parse_types(allow_identifiers=False) 5293 nested = None 5294 else: 5295 this = None 5296 kind = None 5297 nested = True 5298 5299 path = self._match_text_seq("PATH") and self._parse_string() 5300 nested_schema = nested and self._parse_json_schema() 5301 5302 return self.expression( 5303 exp.JSONColumnDef, 5304 this=this, 5305 kind=kind, 5306 path=path, 5307 nested_schema=nested_schema, 5308 ) 5309 5310 def _parse_json_schema(self) -> exp.JSONSchema: 5311 self._match_text_seq("COLUMNS") 5312 return self.expression( 5313 exp.JSONSchema, 5314 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5315 ) 5316 5317 def _parse_json_table(self) -> exp.JSONTable: 5318 this = self._parse_format_json(self._parse_bitwise()) 5319 path = self._match(TokenType.COMMA) and self._parse_string() 5320 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5321 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5322 schema = self._parse_json_schema() 5323 5324 return exp.JSONTable( 5325 this=this, 5326 schema=schema, 5327 path=path, 5328 error_handling=error_handling, 5329 empty_handling=empty_handling, 5330 ) 5331 5332 def _parse_match_against(self) -> exp.MatchAgainst: 5333 expressions = self._parse_csv(self._parse_column) 5334 5335 self._match_text_seq(")", "AGAINST", "(") 5336 5337 this = self._parse_string() 5338 5339 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5340 modifier = "IN NATURAL LANGUAGE MODE" 5341 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5342 modifier = f"{modifier} WITH QUERY EXPANSION" 5343 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5344 modifier = "IN BOOLEAN MODE" 5345 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5346 modifier = "WITH QUERY EXPANSION" 5347 else: 5348 modifier = None 5349 5350 return self.expression( 5351 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5352 ) 5353 5354 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5355 def _parse_open_json(self) -> exp.OpenJSON: 5356 this = self._parse_bitwise() 5357 path = self._match(TokenType.COMMA) and self._parse_string() 5358 5359 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5360 this = self._parse_field(any_token=True) 5361 kind = self._parse_types() 5362 path = self._parse_string() 5363 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5364 5365 return self.expression( 5366 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5367 ) 5368 5369 expressions = None 5370 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5371 self._match_l_paren() 5372 expressions = self._parse_csv(_parse_open_json_column_def) 5373 5374 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5375 5376 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5377 args = self._parse_csv(self._parse_bitwise) 5378 5379 if self._match(TokenType.IN): 5380 return self.expression( 5381 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5382 ) 5383 5384 if haystack_first: 5385 haystack = seq_get(args, 0) 5386 needle = seq_get(args, 1) 5387 else: 5388 needle = seq_get(args, 0) 5389 haystack = seq_get(args, 1) 5390 5391 return self.expression( 5392 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5393 ) 5394 5395 def _parse_predict(self) -> exp.Predict: 5396 self._match_text_seq("MODEL") 5397 this = self._parse_table() 5398 5399 self._match(TokenType.COMMA) 5400 self._match_text_seq("TABLE") 5401 5402 return self.expression( 5403 exp.Predict, 5404 this=this, 5405 expression=self._parse_table(), 5406 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5407 ) 5408 5409 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5410 args = self._parse_csv(self._parse_table) 5411 return exp.JoinHint(this=func_name.upper(), expressions=args) 5412 5413 def _parse_substring(self) -> exp.Substring: 5414 # Postgres supports the form: substring(string [from int] [for int]) 5415 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5416 5417 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5418 5419 if self._match(TokenType.FROM): 5420 args.append(self._parse_bitwise()) 5421 if self._match(TokenType.FOR): 5422 if len(args) == 1: 5423 args.append(exp.Literal.number(1)) 5424 args.append(self._parse_bitwise()) 5425 5426 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5427 5428 def _parse_trim(self) -> exp.Trim: 5429 # https://www.w3resource.com/sql/character-functions/trim.php 5430 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5431 5432 position = None 5433 collation = None 5434 expression = None 5435 5436 if self._match_texts(self.TRIM_TYPES): 5437 position = self._prev.text.upper() 5438 5439 this = self._parse_bitwise() 5440 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5441 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5442 expression = self._parse_bitwise() 5443 5444 if invert_order: 5445 this, expression = expression, this 5446 5447 if self._match(TokenType.COLLATE): 5448 collation = self._parse_bitwise() 5449 5450 return self.expression( 5451 exp.Trim, this=this, position=position, expression=expression, collation=collation 5452 ) 5453 5454 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5455 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5456 5457 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5458 return self._parse_window(self._parse_id_var(), alias=True) 5459 5460 def _parse_respect_or_ignore_nulls( 5461 self, this: t.Optional[exp.Expression] 5462 ) -> t.Optional[exp.Expression]: 5463 if self._match_text_seq("IGNORE", "NULLS"): 5464 return self.expression(exp.IgnoreNulls, this=this) 5465 if self._match_text_seq("RESPECT", "NULLS"): 5466 return self.expression(exp.RespectNulls, this=this) 5467 return this 5468 5469 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5470 if self._match(TokenType.HAVING): 5471 self._match_texts(("MAX", "MIN")) 5472 max = self._prev.text.upper() != "MIN" 5473 return self.expression( 5474 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5475 ) 5476 5477 return this 5478 5479 def _parse_window( 5480 self, this: t.Optional[exp.Expression], alias: bool = False 5481 ) -> t.Optional[exp.Expression]: 5482 func = this 5483 comments = func.comments if isinstance(func, exp.Expression) else None 5484 5485 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5486 self._match(TokenType.WHERE) 5487 this = self.expression( 5488 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5489 ) 5490 self._match_r_paren() 5491 5492 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5493 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5494 if self._match_text_seq("WITHIN", "GROUP"): 5495 order = self._parse_wrapped(self._parse_order) 5496 this = self.expression(exp.WithinGroup, this=this, expression=order) 5497 5498 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5499 # Some dialects choose to implement and some do not. 5500 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5501 5502 # There is some code above in _parse_lambda that handles 5503 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5504 5505 # The below changes handle 5506 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5507 5508 # Oracle allows both formats 5509 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5510 # and Snowflake chose to do the same for familiarity 5511 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5512 if isinstance(this, exp.AggFunc): 5513 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5514 5515 if ignore_respect and ignore_respect is not this: 5516 ignore_respect.replace(ignore_respect.this) 5517 this = self.expression(ignore_respect.__class__, this=this) 5518 5519 this = self._parse_respect_or_ignore_nulls(this) 5520 5521 # bigquery select from window x AS (partition by ...) 5522 if alias: 5523 over = None 5524 self._match(TokenType.ALIAS) 5525 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5526 return this 5527 else: 5528 over = self._prev.text.upper() 5529 5530 if comments and isinstance(func, exp.Expression): 5531 func.pop_comments() 5532 5533 if not self._match(TokenType.L_PAREN): 5534 return self.expression( 5535 exp.Window, 5536 comments=comments, 5537 this=this, 5538 alias=self._parse_id_var(False), 5539 over=over, 5540 ) 5541 5542 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5543 5544 first = self._match(TokenType.FIRST) 5545 if self._match_text_seq("LAST"): 5546 first = False 5547 5548 partition, order = self._parse_partition_and_order() 5549 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5550 5551 if kind: 5552 self._match(TokenType.BETWEEN) 5553 start = self._parse_window_spec() 5554 self._match(TokenType.AND) 5555 end = self._parse_window_spec() 5556 5557 spec = self.expression( 5558 exp.WindowSpec, 5559 kind=kind, 5560 start=start["value"], 5561 start_side=start["side"], 5562 end=end["value"], 5563 end_side=end["side"], 5564 ) 5565 else: 5566 spec = None 5567 5568 self._match_r_paren() 5569 5570 window = self.expression( 5571 exp.Window, 5572 comments=comments, 5573 this=this, 5574 partition_by=partition, 5575 order=order, 5576 spec=spec, 5577 alias=window_alias, 5578 over=over, 5579 first=first, 5580 ) 5581 5582 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5583 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5584 return self._parse_window(window, alias=alias) 5585 5586 return window 5587 5588 def _parse_partition_and_order( 5589 self, 5590 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5591 return self._parse_partition_by(), self._parse_order() 5592 5593 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5594 self._match(TokenType.BETWEEN) 5595 5596 return { 5597 "value": ( 5598 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5599 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5600 or self._parse_bitwise() 5601 ), 5602 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5603 } 5604 5605 def _parse_alias( 5606 self, this: t.Optional[exp.Expression], explicit: bool = False 5607 ) -> t.Optional[exp.Expression]: 5608 any_token = self._match(TokenType.ALIAS) 5609 comments = self._prev_comments or [] 5610 5611 if explicit and not any_token: 5612 return this 5613 5614 if self._match(TokenType.L_PAREN): 5615 aliases = self.expression( 5616 exp.Aliases, 5617 comments=comments, 5618 this=this, 5619 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5620 ) 5621 self._match_r_paren(aliases) 5622 return aliases 5623 5624 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5625 self.STRING_ALIASES and self._parse_string_as_identifier() 5626 ) 5627 5628 if alias: 5629 comments.extend(alias.pop_comments()) 5630 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5631 column = this.this 5632 5633 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5634 if not this.comments and column and column.comments: 5635 this.comments = column.pop_comments() 5636 5637 return this 5638 5639 def _parse_id_var( 5640 self, 5641 any_token: bool = True, 5642 tokens: t.Optional[t.Collection[TokenType]] = None, 5643 ) -> t.Optional[exp.Expression]: 5644 expression = self._parse_identifier() 5645 if not expression and ( 5646 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5647 ): 5648 quoted = self._prev.token_type == TokenType.STRING 5649 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5650 5651 return expression 5652 5653 def _parse_string(self) -> t.Optional[exp.Expression]: 5654 if self._match_set(self.STRING_PARSERS): 5655 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5656 return self._parse_placeholder() 5657 5658 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5659 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5660 5661 def _parse_number(self) -> t.Optional[exp.Expression]: 5662 if self._match_set(self.NUMERIC_PARSERS): 5663 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5664 return self._parse_placeholder() 5665 5666 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5667 if self._match(TokenType.IDENTIFIER): 5668 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5669 return self._parse_placeholder() 5670 5671 def _parse_var( 5672 self, 5673 any_token: bool = False, 5674 tokens: t.Optional[t.Collection[TokenType]] = None, 5675 upper: bool = False, 5676 ) -> t.Optional[exp.Expression]: 5677 if ( 5678 (any_token and self._advance_any()) 5679 or self._match(TokenType.VAR) 5680 or (self._match_set(tokens) if tokens else False) 5681 ): 5682 return self.expression( 5683 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5684 ) 5685 return self._parse_placeholder() 5686 5687 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5688 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5689 self._advance() 5690 return self._prev 5691 return None 5692 5693 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5694 return self._parse_var() or self._parse_string() 5695 5696 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5697 return self._parse_primary() or self._parse_var(any_token=True) 5698 5699 def _parse_null(self) -> t.Optional[exp.Expression]: 5700 if self._match_set(self.NULL_TOKENS): 5701 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5702 return self._parse_placeholder() 5703 5704 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5705 if self._match(TokenType.TRUE): 5706 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5707 if self._match(TokenType.FALSE): 5708 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5709 return self._parse_placeholder() 5710 5711 def _parse_star(self) -> t.Optional[exp.Expression]: 5712 if self._match(TokenType.STAR): 5713 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5714 return self._parse_placeholder() 5715 5716 def _parse_parameter(self) -> exp.Parameter: 5717 this = self._parse_identifier() or self._parse_primary_or_var() 5718 return self.expression(exp.Parameter, this=this) 5719 5720 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5721 if self._match_set(self.PLACEHOLDER_PARSERS): 5722 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5723 if placeholder: 5724 return placeholder 5725 self._advance(-1) 5726 return None 5727 5728 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5729 if not self._match_texts(keywords): 5730 return None 5731 if self._match(TokenType.L_PAREN, advance=False): 5732 return self._parse_wrapped_csv(self._parse_expression) 5733 5734 expression = self._parse_expression() 5735 return [expression] if expression else None 5736 5737 def _parse_csv( 5738 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5739 ) -> t.List[exp.Expression]: 5740 parse_result = parse_method() 5741 items = [parse_result] if parse_result is not None else [] 5742 5743 while self._match(sep): 5744 self._add_comments(parse_result) 5745 parse_result = parse_method() 5746 if parse_result is not None: 5747 items.append(parse_result) 5748 5749 return items 5750 5751 def _parse_tokens( 5752 self, parse_method: t.Callable, expressions: t.Dict 5753 ) -> t.Optional[exp.Expression]: 5754 this = parse_method() 5755 5756 while self._match_set(expressions): 5757 this = self.expression( 5758 expressions[self._prev.token_type], 5759 this=this, 5760 comments=self._prev_comments, 5761 expression=parse_method(), 5762 ) 5763 5764 return this 5765 5766 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5767 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5768 5769 def _parse_wrapped_csv( 5770 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5771 ) -> t.List[exp.Expression]: 5772 return self._parse_wrapped( 5773 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5774 ) 5775 5776 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5777 wrapped = self._match(TokenType.L_PAREN) 5778 if not wrapped and not optional: 5779 self.raise_error("Expecting (") 5780 parse_result = parse_method() 5781 if wrapped: 5782 self._match_r_paren() 5783 return parse_result 5784 5785 def _parse_expressions(self) -> t.List[exp.Expression]: 5786 return self._parse_csv(self._parse_expression) 5787 5788 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5789 return self._parse_select() or self._parse_set_operations( 5790 self._parse_expression() if alias else self._parse_conjunction() 5791 ) 5792 5793 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5794 return self._parse_query_modifiers( 5795 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5796 ) 5797 5798 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5799 this = None 5800 if self._match_texts(self.TRANSACTION_KIND): 5801 this = self._prev.text 5802 5803 self._match_texts(("TRANSACTION", "WORK")) 5804 5805 modes = [] 5806 while True: 5807 mode = [] 5808 while self._match(TokenType.VAR): 5809 mode.append(self._prev.text) 5810 5811 if mode: 5812 modes.append(" ".join(mode)) 5813 if not self._match(TokenType.COMMA): 5814 break 5815 5816 return self.expression(exp.Transaction, this=this, modes=modes) 5817 5818 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5819 chain = None 5820 savepoint = None 5821 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5822 5823 self._match_texts(("TRANSACTION", "WORK")) 5824 5825 if self._match_text_seq("TO"): 5826 self._match_text_seq("SAVEPOINT") 5827 savepoint = self._parse_id_var() 5828 5829 if self._match(TokenType.AND): 5830 chain = not self._match_text_seq("NO") 5831 self._match_text_seq("CHAIN") 5832 5833 if is_rollback: 5834 return self.expression(exp.Rollback, savepoint=savepoint) 5835 5836 return self.expression(exp.Commit, chain=chain) 5837 5838 def _parse_refresh(self) -> exp.Refresh: 5839 self._match(TokenType.TABLE) 5840 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5841 5842 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5843 if not self._match_text_seq("ADD"): 5844 return None 5845 5846 self._match(TokenType.COLUMN) 5847 exists_column = self._parse_exists(not_=True) 5848 expression = self._parse_field_def() 5849 5850 if expression: 5851 expression.set("exists", exists_column) 5852 5853 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5854 if self._match_texts(("FIRST", "AFTER")): 5855 position = self._prev.text 5856 column_position = self.expression( 5857 exp.ColumnPosition, this=self._parse_column(), position=position 5858 ) 5859 expression.set("position", column_position) 5860 5861 return expression 5862 5863 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5864 drop = self._match(TokenType.DROP) and self._parse_drop() 5865 if drop and not isinstance(drop, exp.Command): 5866 drop.set("kind", drop.args.get("kind", "COLUMN")) 5867 return drop 5868 5869 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5870 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5871 return self.expression( 5872 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5873 ) 5874 5875 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5876 index = self._index - 1 5877 5878 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5879 return self._parse_csv( 5880 lambda: self.expression( 5881 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5882 ) 5883 ) 5884 5885 self._retreat(index) 5886 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5887 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5888 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5889 5890 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5891 if self._match_texts(self.ALTER_ALTER_PARSERS): 5892 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5893 5894 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5895 # keyword after ALTER we default to parsing this statement 5896 self._match(TokenType.COLUMN) 5897 column = self._parse_field(any_token=True) 5898 5899 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5900 return self.expression(exp.AlterColumn, this=column, drop=True) 5901 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5902 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5903 if self._match(TokenType.COMMENT): 5904 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5905 5906 self._match_text_seq("SET", "DATA") 5907 self._match_text_seq("TYPE") 5908 return self.expression( 5909 exp.AlterColumn, 5910 this=column, 5911 dtype=self._parse_types(), 5912 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5913 using=self._match(TokenType.USING) and self._parse_conjunction(), 5914 ) 5915 5916 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5917 if self._match_texts(("ALL", "EVEN", "AUTO")): 5918 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5919 5920 self._match_text_seq("KEY", "DISTKEY") 5921 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5922 5923 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5924 if compound: 5925 self._match_text_seq("SORTKEY") 5926 5927 if self._match(TokenType.L_PAREN, advance=False): 5928 return self.expression( 5929 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5930 ) 5931 5932 self._match_texts(("AUTO", "NONE")) 5933 return self.expression( 5934 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5935 ) 5936 5937 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5938 index = self._index - 1 5939 5940 partition_exists = self._parse_exists() 5941 if self._match(TokenType.PARTITION, advance=False): 5942 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5943 5944 self._retreat(index) 5945 return self._parse_csv(self._parse_drop_column) 5946 5947 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5948 if self._match(TokenType.COLUMN): 5949 exists = self._parse_exists() 5950 old_column = self._parse_column() 5951 to = self._match_text_seq("TO") 5952 new_column = self._parse_column() 5953 5954 if old_column is None or to is None or new_column is None: 5955 return None 5956 5957 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5958 5959 self._match_text_seq("TO") 5960 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5961 5962 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5963 start = self._prev 5964 5965 if not self._match(TokenType.TABLE): 5966 return self._parse_as_command(start) 5967 5968 exists = self._parse_exists() 5969 only = self._match_text_seq("ONLY") 5970 this = self._parse_table(schema=True) 5971 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5972 5973 if self._next: 5974 self._advance() 5975 5976 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5977 if parser: 5978 actions = ensure_list(parser(self)) 5979 options = self._parse_csv(self._parse_property) 5980 5981 if not self._curr and actions: 5982 return self.expression( 5983 exp.AlterTable, 5984 this=this, 5985 exists=exists, 5986 actions=actions, 5987 only=only, 5988 options=options, 5989 cluster=cluster, 5990 ) 5991 5992 return self._parse_as_command(start) 5993 5994 def _parse_merge(self) -> exp.Merge: 5995 self._match(TokenType.INTO) 5996 target = self._parse_table() 5997 5998 if target and self._match(TokenType.ALIAS, advance=False): 5999 target.set("alias", self._parse_table_alias()) 6000 6001 self._match(TokenType.USING) 6002 using = self._parse_table() 6003 6004 self._match(TokenType.ON) 6005 on = self._parse_conjunction() 6006 6007 return self.expression( 6008 exp.Merge, 6009 this=target, 6010 using=using, 6011 on=on, 6012 expressions=self._parse_when_matched(), 6013 ) 6014 6015 def _parse_when_matched(self) -> t.List[exp.When]: 6016 whens = [] 6017 6018 while self._match(TokenType.WHEN): 6019 matched = not self._match(TokenType.NOT) 6020 self._match_text_seq("MATCHED") 6021 source = ( 6022 False 6023 if self._match_text_seq("BY", "TARGET") 6024 else self._match_text_seq("BY", "SOURCE") 6025 ) 6026 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6027 6028 self._match(TokenType.THEN) 6029 6030 if self._match(TokenType.INSERT): 6031 _this = self._parse_star() 6032 if _this: 6033 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6034 else: 6035 then = self.expression( 6036 exp.Insert, 6037 this=self._parse_value(), 6038 expression=self._match_text_seq("VALUES") and self._parse_value(), 6039 ) 6040 elif self._match(TokenType.UPDATE): 6041 expressions = self._parse_star() 6042 if expressions: 6043 then = self.expression(exp.Update, expressions=expressions) 6044 else: 6045 then = self.expression( 6046 exp.Update, 6047 expressions=self._match(TokenType.SET) 6048 and self._parse_csv(self._parse_equality), 6049 ) 6050 elif self._match(TokenType.DELETE): 6051 then = self.expression(exp.Var, this=self._prev.text) 6052 else: 6053 then = None 6054 6055 whens.append( 6056 self.expression( 6057 exp.When, 6058 matched=matched, 6059 source=source, 6060 condition=condition, 6061 then=then, 6062 ) 6063 ) 6064 return whens 6065 6066 def _parse_show(self) -> t.Optional[exp.Expression]: 6067 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6068 if parser: 6069 return parser(self) 6070 return self._parse_as_command(self._prev) 6071 6072 def _parse_set_item_assignment( 6073 self, kind: t.Optional[str] = None 6074 ) -> t.Optional[exp.Expression]: 6075 index = self._index 6076 6077 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6078 return self._parse_set_transaction(global_=kind == "GLOBAL") 6079 6080 left = self._parse_primary() or self._parse_column() 6081 assignment_delimiter = self._match_texts(("=", "TO")) 6082 6083 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6084 self._retreat(index) 6085 return None 6086 6087 right = self._parse_statement() or self._parse_id_var() 6088 this = self.expression(exp.EQ, this=left, expression=right) 6089 6090 return self.expression(exp.SetItem, this=this, kind=kind) 6091 6092 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6093 self._match_text_seq("TRANSACTION") 6094 characteristics = self._parse_csv( 6095 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6096 ) 6097 return self.expression( 6098 exp.SetItem, 6099 expressions=characteristics, 6100 kind="TRANSACTION", 6101 **{"global": global_}, # type: ignore 6102 ) 6103 6104 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6105 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6106 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6107 6108 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6109 index = self._index 6110 set_ = self.expression( 6111 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6112 ) 6113 6114 if self._curr: 6115 self._retreat(index) 6116 return self._parse_as_command(self._prev) 6117 6118 return set_ 6119 6120 def _parse_var_from_options( 6121 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6122 ) -> t.Optional[exp.Var]: 6123 start = self._curr 6124 if not start: 6125 return None 6126 6127 option = start.text.upper() 6128 continuations = options.get(option) 6129 6130 index = self._index 6131 self._advance() 6132 for keywords in continuations or []: 6133 if isinstance(keywords, str): 6134 keywords = (keywords,) 6135 6136 if self._match_text_seq(*keywords): 6137 option = f"{option} {' '.join(keywords)}" 6138 break 6139 else: 6140 if continuations or continuations is None: 6141 if raise_unmatched: 6142 self.raise_error(f"Unknown option {option}") 6143 6144 self._retreat(index) 6145 return None 6146 6147 return exp.var(option) 6148 6149 def _parse_as_command(self, start: Token) -> exp.Command: 6150 while self._curr: 6151 self._advance() 6152 text = self._find_sql(start, self._prev) 6153 size = len(start.text) 6154 self._warn_unsupported() 6155 return exp.Command(this=text[:size], expression=text[size:]) 6156 6157 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6158 settings = [] 6159 6160 self._match_l_paren() 6161 kind = self._parse_id_var() 6162 6163 if self._match(TokenType.L_PAREN): 6164 while True: 6165 key = self._parse_id_var() 6166 value = self._parse_primary() 6167 6168 if not key and value is None: 6169 break 6170 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6171 self._match(TokenType.R_PAREN) 6172 6173 self._match_r_paren() 6174 6175 return self.expression( 6176 exp.DictProperty, 6177 this=this, 6178 kind=kind.this if kind else None, 6179 settings=settings, 6180 ) 6181 6182 def _parse_dict_range(self, this: str) -> exp.DictRange: 6183 self._match_l_paren() 6184 has_min = self._match_text_seq("MIN") 6185 if has_min: 6186 min = self._parse_var() or self._parse_primary() 6187 self._match_text_seq("MAX") 6188 max = self._parse_var() or self._parse_primary() 6189 else: 6190 max = self._parse_var() or self._parse_primary() 6191 min = exp.Literal.number(0) 6192 self._match_r_paren() 6193 return self.expression(exp.DictRange, this=this, min=min, max=max) 6194 6195 def _parse_comprehension( 6196 self, this: t.Optional[exp.Expression] 6197 ) -> t.Optional[exp.Comprehension]: 6198 index = self._index 6199 expression = self._parse_column() 6200 if not self._match(TokenType.IN): 6201 self._retreat(index - 1) 6202 return None 6203 iterator = self._parse_column() 6204 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6205 return self.expression( 6206 exp.Comprehension, 6207 this=this, 6208 expression=expression, 6209 iterator=iterator, 6210 condition=condition, 6211 ) 6212 6213 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6214 if self._match(TokenType.HEREDOC_STRING): 6215 return self.expression(exp.Heredoc, this=self._prev.text) 6216 6217 if not self._match_text_seq("$"): 6218 return None 6219 6220 tags = ["$"] 6221 tag_text = None 6222 6223 if self._is_connected(): 6224 self._advance() 6225 tags.append(self._prev.text.upper()) 6226 else: 6227 self.raise_error("No closing $ found") 6228 6229 if tags[-1] != "$": 6230 if self._is_connected() and self._match_text_seq("$"): 6231 tag_text = tags[-1] 6232 tags.append("$") 6233 else: 6234 self.raise_error("No closing $ found") 6235 6236 heredoc_start = self._curr 6237 6238 while self._curr: 6239 if self._match_text_seq(*tags, advance=False): 6240 this = self._find_sql(heredoc_start, self._prev) 6241 self._advance(len(tags)) 6242 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6243 6244 self._advance() 6245 6246 self.raise_error(f"No closing {''.join(tags)} found") 6247 return None 6248 6249 def _find_parser( 6250 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6251 ) -> t.Optional[t.Callable]: 6252 if not self._curr: 6253 return None 6254 6255 index = self._index 6256 this = [] 6257 while True: 6258 # The current token might be multiple words 6259 curr = self._curr.text.upper() 6260 key = curr.split(" ") 6261 this.append(curr) 6262 6263 self._advance() 6264 result, trie = in_trie(trie, key) 6265 if result == TrieResult.FAILED: 6266 break 6267 6268 if result == TrieResult.EXISTS: 6269 subparser = parsers[" ".join(this)] 6270 return subparser 6271 6272 self._retreat(index) 6273 return None 6274 6275 def _match(self, token_type, advance=True, expression=None): 6276 if not self._curr: 6277 return None 6278 6279 if self._curr.token_type == token_type: 6280 if advance: 6281 self._advance() 6282 self._add_comments(expression) 6283 return True 6284 6285 return None 6286 6287 def _match_set(self, types, advance=True): 6288 if not self._curr: 6289 return None 6290 6291 if self._curr.token_type in types: 6292 if advance: 6293 self._advance() 6294 return True 6295 6296 return None 6297 6298 def _match_pair(self, token_type_a, token_type_b, advance=True): 6299 if not self._curr or not self._next: 6300 return None 6301 6302 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6303 if advance: 6304 self._advance(2) 6305 return True 6306 6307 return None 6308 6309 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6310 if not self._match(TokenType.L_PAREN, expression=expression): 6311 self.raise_error("Expecting (") 6312 6313 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6314 if not self._match(TokenType.R_PAREN, expression=expression): 6315 self.raise_error("Expecting )") 6316 6317 def _match_texts(self, texts, advance=True): 6318 if self._curr and self._curr.text.upper() in texts: 6319 if advance: 6320 self._advance() 6321 return True 6322 return None 6323 6324 def _match_text_seq(self, *texts, advance=True): 6325 index = self._index 6326 for text in texts: 6327 if self._curr and self._curr.text.upper() == text: 6328 self._advance() 6329 else: 6330 self._retreat(index) 6331 return None 6332 6333 if not advance: 6334 self._retreat(index) 6335 6336 return True 6337 6338 def _replace_lambda( 6339 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6340 ) -> t.Optional[exp.Expression]: 6341 if not node: 6342 return node 6343 6344 for column in node.find_all(exp.Column): 6345 if column.parts[0].name in lambda_variables: 6346 dot_or_id = column.to_dot() if column.table else column.this 6347 parent = column.parent 6348 6349 while isinstance(parent, exp.Dot): 6350 if not isinstance(parent.parent, exp.Dot): 6351 parent.replace(dot_or_id) 6352 break 6353 parent = parent.parent 6354 else: 6355 if column is node: 6356 node = dot_or_id 6357 else: 6358 column.replace(dot_or_id) 6359 return node 6360 6361 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6362 start = self._prev 6363 6364 # Not to be confused with TRUNCATE(number, decimals) function call 6365 if self._match(TokenType.L_PAREN): 6366 self._retreat(self._index - 2) 6367 return self._parse_function() 6368 6369 # Clickhouse supports TRUNCATE DATABASE as well 6370 is_database = self._match(TokenType.DATABASE) 6371 6372 self._match(TokenType.TABLE) 6373 6374 exists = self._parse_exists(not_=False) 6375 6376 expressions = self._parse_csv( 6377 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6378 ) 6379 6380 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6381 6382 if self._match_text_seq("RESTART", "IDENTITY"): 6383 identity = "RESTART" 6384 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6385 identity = "CONTINUE" 6386 else: 6387 identity = None 6388 6389 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6390 option = self._prev.text 6391 else: 6392 option = None 6393 6394 partition = self._parse_partition() 6395 6396 # Fallback case 6397 if self._curr: 6398 return self._parse_as_command(start) 6399 6400 return self.expression( 6401 exp.TruncateTable, 6402 expressions=expressions, 6403 is_database=is_database, 6404 exists=exists, 6405 cluster=cluster, 6406 identity=identity, 6407 option=option, 6408 partition=partition, 6409 ) 6410 6411 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6412 this = self._parse_ordered(self._parse_opclass) 6413 6414 if not self._match(TokenType.WITH): 6415 return this 6416 6417 op = self._parse_var(any_token=True) 6418 6419 return self.expression(exp.WithOperator, this=this, op=op) 6420 6421 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6422 opts = [] 6423 self._match(TokenType.EQ) 6424 self._match(TokenType.L_PAREN) 6425 while self._curr and not self._match(TokenType.R_PAREN): 6426 opts.append(self._parse_conjunction()) 6427 self._match(TokenType.COMMA) 6428 return opts 6429 6430 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6431 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6432 6433 options = [] 6434 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6435 option = self._parse_unquoted_field() 6436 value = None 6437 6438 # Some options are defined as functions with the values as params 6439 if not isinstance(option, exp.Func): 6440 prev = self._prev.text.upper() 6441 # Different dialects might separate options and values by white space, "=" and "AS" 6442 self._match(TokenType.EQ) 6443 self._match(TokenType.ALIAS) 6444 6445 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6446 # Snowflake FILE_FORMAT case 6447 value = self._parse_wrapped_options() 6448 else: 6449 value = self._parse_unquoted_field() 6450 6451 param = self.expression(exp.CopyParameter, this=option, expression=value) 6452 options.append(param) 6453 6454 if sep: 6455 self._match(sep) 6456 6457 return options 6458 6459 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6460 expr = self.expression(exp.Credentials) 6461 6462 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6463 expr.set("storage", self._parse_conjunction()) 6464 if self._match_text_seq("CREDENTIALS"): 6465 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6466 creds = ( 6467 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6468 ) 6469 expr.set("credentials", creds) 6470 if self._match_text_seq("ENCRYPTION"): 6471 expr.set("encryption", self._parse_wrapped_options()) 6472 if self._match_text_seq("IAM_ROLE"): 6473 expr.set("iam_role", self._parse_field()) 6474 if self._match_text_seq("REGION"): 6475 expr.set("region", self._parse_field()) 6476 6477 return expr 6478 6479 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6480 return self._parse_field() 6481 6482 def _parse_copy(self) -> exp.Copy | exp.Command: 6483 start = self._prev 6484 6485 self._match(TokenType.INTO) 6486 6487 this = ( 6488 self._parse_conjunction() 6489 if self._match(TokenType.L_PAREN, advance=False) 6490 else self._parse_table(schema=True) 6491 ) 6492 6493 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6494 6495 files = self._parse_csv(self._parse_file_location) 6496 credentials = self._parse_credentials() 6497 6498 self._match_text_seq("WITH") 6499 6500 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6501 6502 # Fallback case 6503 if self._curr: 6504 return self._parse_as_command(start) 6505 6506 return self.expression( 6507 exp.Copy, 6508 this=this, 6509 kind=kind, 6510 credentials=credentials, 6511 files=files, 6512 params=params, 6513 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1193 def __init__( 1194 self, 1195 error_level: t.Optional[ErrorLevel] = None, 1196 error_message_context: int = 100, 1197 max_errors: int = 3, 1198 dialect: DialectType = None, 1199 ): 1200 from sqlglot.dialects import Dialect 1201 1202 self.error_level = error_level or ErrorLevel.IMMEDIATE 1203 self.error_message_context = error_message_context 1204 self.max_errors = max_errors 1205 self.dialect = Dialect.get_or_raise(dialect) 1206 self.reset()
1218 def parse( 1219 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1220 ) -> t.List[t.Optional[exp.Expression]]: 1221 """ 1222 Parses a list of tokens and returns a list of syntax trees, one tree 1223 per parsed SQL statement. 1224 1225 Args: 1226 raw_tokens: The list of tokens. 1227 sql: The original SQL string, used to produce helpful debug messages. 1228 1229 Returns: 1230 The list of the produced syntax trees. 1231 """ 1232 return self._parse( 1233 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1234 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1236 def parse_into( 1237 self, 1238 expression_types: exp.IntoType, 1239 raw_tokens: t.List[Token], 1240 sql: t.Optional[str] = None, 1241 ) -> t.List[t.Optional[exp.Expression]]: 1242 """ 1243 Parses a list of tokens into a given Expression type. If a collection of Expression 1244 types is given instead, this method will try to parse the token list into each one 1245 of them, stopping at the first for which the parsing succeeds. 1246 1247 Args: 1248 expression_types: The expression type(s) to try and parse the token list into. 1249 raw_tokens: The list of tokens. 1250 sql: The original SQL string, used to produce helpful debug messages. 1251 1252 Returns: 1253 The target Expression. 1254 """ 1255 errors = [] 1256 for expression_type in ensure_list(expression_types): 1257 parser = self.EXPRESSION_PARSERS.get(expression_type) 1258 if not parser: 1259 raise TypeError(f"No parser registered for {expression_type}") 1260 1261 try: 1262 return self._parse(parser, raw_tokens, sql) 1263 except ParseError as e: 1264 e.errors[0]["into_expression"] = expression_type 1265 errors.append(e) 1266 1267 raise ParseError( 1268 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1269 errors=merge_errors(errors), 1270 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1310 def check_errors(self) -> None: 1311 """Logs or raises any found errors, depending on the chosen error level setting.""" 1312 if self.error_level == ErrorLevel.WARN: 1313 for error in self.errors: 1314 logger.error(str(error)) 1315 elif self.error_level == ErrorLevel.RAISE and self.errors: 1316 raise ParseError( 1317 concat_messages(self.errors, self.max_errors), 1318 errors=merge_errors(self.errors), 1319 )
Logs or raises any found errors, depending on the chosen error level setting.
1321 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1322 """ 1323 Appends an error in the list of recorded errors or raises it, depending on the chosen 1324 error level setting. 1325 """ 1326 token = token or self._curr or self._prev or Token.string("") 1327 start = token.start 1328 end = token.end + 1 1329 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1330 highlight = self.sql[start:end] 1331 end_context = self.sql[end : end + self.error_message_context] 1332 1333 error = ParseError.new( 1334 f"{message}. Line {token.line}, Col: {token.col}.\n" 1335 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1336 description=message, 1337 line=token.line, 1338 col=token.col, 1339 start_context=start_context, 1340 highlight=highlight, 1341 end_context=end_context, 1342 ) 1343 1344 if self.error_level == ErrorLevel.IMMEDIATE: 1345 raise error 1346 1347 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1349 def expression( 1350 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1351 ) -> E: 1352 """ 1353 Creates a new, validated Expression. 1354 1355 Args: 1356 exp_class: The expression class to instantiate. 1357 comments: An optional list of comments to attach to the expression. 1358 kwargs: The arguments to set for the expression along with their respective values. 1359 1360 Returns: 1361 The target expression. 1362 """ 1363 instance = exp_class(**kwargs) 1364 instance.add_comments(comments) if comments else self._add_comments(instance) 1365 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1372 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1373 """ 1374 Validates an Expression, making sure that all its mandatory arguments are set. 1375 1376 Args: 1377 expression: The expression to validate. 1378 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1379 1380 Returns: 1381 The validated expression. 1382 """ 1383 if self.error_level != ErrorLevel.IGNORE: 1384 for error_message in expression.error_messages(args): 1385 self.raise_error(error_message) 1386 1387 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.