sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 154 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 155 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 156 "LIKE": build_like, 157 "LOG": build_logarithm, 158 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 159 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 160 "MOD": build_mod, 161 "TIME_TO_TIME_STR": lambda args: exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 166 this=exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 start=exp.Literal.number(1), 171 length=exp.Literal.number(10), 172 ), 173 "VAR_MAP": build_var_map, 174 "LOWER": build_lower, 175 "UPPER": build_upper, 176 "HEX": build_hex, 177 "TO_HEX": build_hex, 178 } 179 180 NO_PAREN_FUNCTIONS = { 181 TokenType.CURRENT_DATE: exp.CurrentDate, 182 TokenType.CURRENT_DATETIME: exp.CurrentDate, 183 TokenType.CURRENT_TIME: exp.CurrentTime, 184 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 185 TokenType.CURRENT_USER: exp.CurrentUser, 186 } 187 188 STRUCT_TYPE_TOKENS = { 189 TokenType.NESTED, 190 TokenType.OBJECT, 191 TokenType.STRUCT, 192 } 193 194 NESTED_TYPE_TOKENS = { 195 TokenType.ARRAY, 196 TokenType.LOWCARDINALITY, 197 TokenType.MAP, 198 TokenType.NULLABLE, 199 *STRUCT_TYPE_TOKENS, 200 } 201 202 ENUM_TYPE_TOKENS = { 203 TokenType.ENUM, 204 TokenType.ENUM8, 205 TokenType.ENUM16, 206 } 207 208 AGGREGATE_TYPE_TOKENS = { 209 TokenType.AGGREGATEFUNCTION, 210 TokenType.SIMPLEAGGREGATEFUNCTION, 211 } 212 213 TYPE_TOKENS = { 214 TokenType.BIT, 215 TokenType.BOOLEAN, 216 TokenType.TINYINT, 217 TokenType.UTINYINT, 218 TokenType.SMALLINT, 219 TokenType.USMALLINT, 220 TokenType.INT, 221 TokenType.UINT, 222 TokenType.BIGINT, 223 TokenType.UBIGINT, 224 TokenType.INT128, 225 TokenType.UINT128, 226 TokenType.INT256, 227 TokenType.UINT256, 228 TokenType.MEDIUMINT, 229 TokenType.UMEDIUMINT, 230 TokenType.FIXEDSTRING, 231 TokenType.FLOAT, 232 TokenType.DOUBLE, 233 TokenType.CHAR, 234 TokenType.NCHAR, 235 TokenType.VARCHAR, 236 TokenType.NVARCHAR, 237 TokenType.BPCHAR, 238 TokenType.TEXT, 239 TokenType.MEDIUMTEXT, 240 TokenType.LONGTEXT, 241 TokenType.MEDIUMBLOB, 242 TokenType.LONGBLOB, 243 TokenType.BINARY, 244 TokenType.VARBINARY, 245 TokenType.JSON, 246 TokenType.JSONB, 247 TokenType.INTERVAL, 248 TokenType.TINYBLOB, 249 TokenType.TINYTEXT, 250 TokenType.TIME, 251 TokenType.TIMETZ, 252 TokenType.TIMESTAMP, 253 TokenType.TIMESTAMP_S, 254 TokenType.TIMESTAMP_MS, 255 TokenType.TIMESTAMP_NS, 256 TokenType.TIMESTAMPTZ, 257 TokenType.TIMESTAMPLTZ, 258 TokenType.TIMESTAMPNTZ, 259 TokenType.DATETIME, 260 TokenType.DATETIME64, 261 TokenType.DATE, 262 TokenType.DATE32, 263 TokenType.INT4RANGE, 264 TokenType.INT4MULTIRANGE, 265 TokenType.INT8RANGE, 266 TokenType.INT8MULTIRANGE, 267 TokenType.NUMRANGE, 268 TokenType.NUMMULTIRANGE, 269 TokenType.TSRANGE, 270 TokenType.TSMULTIRANGE, 271 TokenType.TSTZRANGE, 272 TokenType.TSTZMULTIRANGE, 273 TokenType.DATERANGE, 274 TokenType.DATEMULTIRANGE, 275 TokenType.DECIMAL, 276 TokenType.UDECIMAL, 277 TokenType.BIGDECIMAL, 278 TokenType.UUID, 279 TokenType.GEOGRAPHY, 280 TokenType.GEOMETRY, 281 TokenType.HLLSKETCH, 282 TokenType.HSTORE, 283 TokenType.PSEUDO_TYPE, 284 TokenType.SUPER, 285 TokenType.SERIAL, 286 TokenType.SMALLSERIAL, 287 TokenType.BIGSERIAL, 288 TokenType.XML, 289 TokenType.YEAR, 290 TokenType.UNIQUEIDENTIFIER, 291 TokenType.USERDEFINED, 292 TokenType.MONEY, 293 TokenType.SMALLMONEY, 294 TokenType.ROWVERSION, 295 TokenType.IMAGE, 296 TokenType.VARIANT, 297 TokenType.OBJECT, 298 TokenType.OBJECT_IDENTIFIER, 299 TokenType.INET, 300 TokenType.IPADDRESS, 301 TokenType.IPPREFIX, 302 TokenType.IPV4, 303 TokenType.IPV6, 304 TokenType.UNKNOWN, 305 TokenType.NULL, 306 TokenType.NAME, 307 TokenType.TDIGEST, 308 *ENUM_TYPE_TOKENS, 309 *NESTED_TYPE_TOKENS, 310 *AGGREGATE_TYPE_TOKENS, 311 } 312 313 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 314 TokenType.BIGINT: TokenType.UBIGINT, 315 TokenType.INT: TokenType.UINT, 316 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 317 TokenType.SMALLINT: TokenType.USMALLINT, 318 TokenType.TINYINT: TokenType.UTINYINT, 319 TokenType.DECIMAL: TokenType.UDECIMAL, 320 } 321 322 SUBQUERY_PREDICATES = { 323 TokenType.ANY: exp.Any, 324 TokenType.ALL: exp.All, 325 TokenType.EXISTS: exp.Exists, 326 TokenType.SOME: exp.Any, 327 } 328 329 RESERVED_TOKENS = { 330 *Tokenizer.SINGLE_TOKENS.values(), 331 TokenType.SELECT, 332 } - {TokenType.IDENTIFIER} 333 334 DB_CREATABLES = { 335 TokenType.DATABASE, 336 TokenType.DICTIONARY, 337 TokenType.MODEL, 338 TokenType.SCHEMA, 339 TokenType.SEQUENCE, 340 TokenType.STORAGE_INTEGRATION, 341 TokenType.TABLE, 342 TokenType.TAG, 343 TokenType.VIEW, 344 TokenType.WAREHOUSE, 345 TokenType.STREAMLIT, 346 } 347 348 CREATABLES = { 349 TokenType.COLUMN, 350 TokenType.CONSTRAINT, 351 TokenType.FOREIGN_KEY, 352 TokenType.FUNCTION, 353 TokenType.INDEX, 354 TokenType.PROCEDURE, 355 *DB_CREATABLES, 356 } 357 358 # Tokens that can represent identifiers 359 ID_VAR_TOKENS = { 360 TokenType.VAR, 361 TokenType.ANTI, 362 TokenType.APPLY, 363 TokenType.ASC, 364 TokenType.ASOF, 365 TokenType.AUTO_INCREMENT, 366 TokenType.BEGIN, 367 TokenType.BPCHAR, 368 TokenType.CACHE, 369 TokenType.CASE, 370 TokenType.COLLATE, 371 TokenType.COMMAND, 372 TokenType.COMMENT, 373 TokenType.COMMIT, 374 TokenType.CONSTRAINT, 375 TokenType.COPY, 376 TokenType.DEFAULT, 377 TokenType.DELETE, 378 TokenType.DESC, 379 TokenType.DESCRIBE, 380 TokenType.DICTIONARY, 381 TokenType.DIV, 382 TokenType.END, 383 TokenType.EXECUTE, 384 TokenType.ESCAPE, 385 TokenType.FALSE, 386 TokenType.FIRST, 387 TokenType.FILTER, 388 TokenType.FINAL, 389 TokenType.FORMAT, 390 TokenType.FULL, 391 TokenType.IDENTIFIER, 392 TokenType.IS, 393 TokenType.ISNULL, 394 TokenType.INTERVAL, 395 TokenType.KEEP, 396 TokenType.KILL, 397 TokenType.LEFT, 398 TokenType.LOAD, 399 TokenType.MERGE, 400 TokenType.NATURAL, 401 TokenType.NEXT, 402 TokenType.OFFSET, 403 TokenType.OPERATOR, 404 TokenType.ORDINALITY, 405 TokenType.OVERLAPS, 406 TokenType.OVERWRITE, 407 TokenType.PARTITION, 408 TokenType.PERCENT, 409 TokenType.PIVOT, 410 TokenType.PRAGMA, 411 TokenType.RANGE, 412 TokenType.RECURSIVE, 413 TokenType.REFERENCES, 414 TokenType.REFRESH, 415 TokenType.REPLACE, 416 TokenType.RIGHT, 417 TokenType.ROLLUP, 418 TokenType.ROW, 419 TokenType.ROWS, 420 TokenType.SEMI, 421 TokenType.SET, 422 TokenType.SETTINGS, 423 TokenType.SHOW, 424 TokenType.TEMPORARY, 425 TokenType.TOP, 426 TokenType.TRUE, 427 TokenType.TRUNCATE, 428 TokenType.UNIQUE, 429 TokenType.UNNEST, 430 TokenType.UNPIVOT, 431 TokenType.UPDATE, 432 TokenType.USE, 433 TokenType.VOLATILE, 434 TokenType.WINDOW, 435 *CREATABLES, 436 *SUBQUERY_PREDICATES, 437 *TYPE_TOKENS, 438 *NO_PAREN_FUNCTIONS, 439 } 440 441 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 442 443 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 444 TokenType.ANTI, 445 TokenType.APPLY, 446 TokenType.ASOF, 447 TokenType.FULL, 448 TokenType.LEFT, 449 TokenType.LOCK, 450 TokenType.NATURAL, 451 TokenType.OFFSET, 452 TokenType.RIGHT, 453 TokenType.SEMI, 454 TokenType.WINDOW, 455 } 456 457 ALIAS_TOKENS = ID_VAR_TOKENS 458 459 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 460 461 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 462 463 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 464 465 FUNC_TOKENS = { 466 TokenType.COLLATE, 467 TokenType.COMMAND, 468 TokenType.CURRENT_DATE, 469 TokenType.CURRENT_DATETIME, 470 TokenType.CURRENT_TIMESTAMP, 471 TokenType.CURRENT_TIME, 472 TokenType.CURRENT_USER, 473 TokenType.FILTER, 474 TokenType.FIRST, 475 TokenType.FORMAT, 476 TokenType.GLOB, 477 TokenType.IDENTIFIER, 478 TokenType.INDEX, 479 TokenType.ISNULL, 480 TokenType.ILIKE, 481 TokenType.INSERT, 482 TokenType.LIKE, 483 TokenType.MERGE, 484 TokenType.OFFSET, 485 TokenType.PRIMARY_KEY, 486 TokenType.RANGE, 487 TokenType.REPLACE, 488 TokenType.RLIKE, 489 TokenType.ROW, 490 TokenType.UNNEST, 491 TokenType.VAR, 492 TokenType.LEFT, 493 TokenType.RIGHT, 494 TokenType.SEQUENCE, 495 TokenType.DATE, 496 TokenType.DATETIME, 497 TokenType.TABLE, 498 TokenType.TIMESTAMP, 499 TokenType.TIMESTAMPTZ, 500 TokenType.TRUNCATE, 501 TokenType.WINDOW, 502 TokenType.XOR, 503 *TYPE_TOKENS, 504 *SUBQUERY_PREDICATES, 505 } 506 507 CONJUNCTION = { 508 TokenType.AND: exp.And, 509 TokenType.OR: exp.Or, 510 } 511 512 EQUALITY = { 513 TokenType.EQ: exp.EQ, 514 TokenType.NEQ: exp.NEQ, 515 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 516 } 517 518 COMPARISON = { 519 TokenType.GT: exp.GT, 520 TokenType.GTE: exp.GTE, 521 TokenType.LT: exp.LT, 522 TokenType.LTE: exp.LTE, 523 } 524 525 BITWISE = { 526 TokenType.AMP: exp.BitwiseAnd, 527 TokenType.CARET: exp.BitwiseXor, 528 TokenType.PIPE: exp.BitwiseOr, 529 } 530 531 TERM = { 532 TokenType.DASH: exp.Sub, 533 TokenType.PLUS: exp.Add, 534 TokenType.MOD: exp.Mod, 535 TokenType.COLLATE: exp.Collate, 536 } 537 538 FACTOR = { 539 TokenType.DIV: exp.IntDiv, 540 TokenType.LR_ARROW: exp.Distance, 541 TokenType.SLASH: exp.Div, 542 TokenType.STAR: exp.Mul, 543 } 544 545 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 546 547 TIMES = { 548 TokenType.TIME, 549 TokenType.TIMETZ, 550 } 551 552 TIMESTAMPS = { 553 TokenType.TIMESTAMP, 554 TokenType.TIMESTAMPTZ, 555 TokenType.TIMESTAMPLTZ, 556 *TIMES, 557 } 558 559 SET_OPERATIONS = { 560 TokenType.UNION, 561 TokenType.INTERSECT, 562 TokenType.EXCEPT, 563 } 564 565 JOIN_METHODS = { 566 TokenType.ASOF, 567 TokenType.NATURAL, 568 TokenType.POSITIONAL, 569 } 570 571 JOIN_SIDES = { 572 TokenType.LEFT, 573 TokenType.RIGHT, 574 TokenType.FULL, 575 } 576 577 JOIN_KINDS = { 578 TokenType.INNER, 579 TokenType.OUTER, 580 TokenType.CROSS, 581 TokenType.SEMI, 582 TokenType.ANTI, 583 } 584 585 JOIN_HINTS: t.Set[str] = set() 586 587 LAMBDAS = { 588 TokenType.ARROW: lambda self, expressions: self.expression( 589 exp.Lambda, 590 this=self._replace_lambda( 591 self._parse_conjunction(), 592 expressions, 593 ), 594 expressions=expressions, 595 ), 596 TokenType.FARROW: lambda self, expressions: self.expression( 597 exp.Kwarg, 598 this=exp.var(expressions[0].name), 599 expression=self._parse_conjunction(), 600 ), 601 } 602 603 COLUMN_OPERATORS = { 604 TokenType.DOT: None, 605 TokenType.DCOLON: lambda self, this, to: self.expression( 606 exp.Cast if self.STRICT_CAST else exp.TryCast, 607 this=this, 608 to=to, 609 ), 610 TokenType.ARROW: lambda self, this, path: self.expression( 611 exp.JSONExtract, 612 this=this, 613 expression=self.dialect.to_json_path(path), 614 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 615 ), 616 TokenType.DARROW: lambda self, this, path: self.expression( 617 exp.JSONExtractScalar, 618 this=this, 619 expression=self.dialect.to_json_path(path), 620 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 621 ), 622 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 623 exp.JSONBExtract, 624 this=this, 625 expression=path, 626 ), 627 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 628 exp.JSONBExtractScalar, 629 this=this, 630 expression=path, 631 ), 632 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 633 exp.JSONBContains, 634 this=this, 635 expression=key, 636 ), 637 } 638 639 EXPRESSION_PARSERS = { 640 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 641 exp.Column: lambda self: self._parse_column(), 642 exp.Condition: lambda self: self._parse_conjunction(), 643 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 644 exp.Expression: lambda self: self._parse_expression(), 645 exp.From: lambda self: self._parse_from(joins=True), 646 exp.Group: lambda self: self._parse_group(), 647 exp.Having: lambda self: self._parse_having(), 648 exp.Identifier: lambda self: self._parse_id_var(), 649 exp.Join: lambda self: self._parse_join(), 650 exp.Lambda: lambda self: self._parse_lambda(), 651 exp.Lateral: lambda self: self._parse_lateral(), 652 exp.Limit: lambda self: self._parse_limit(), 653 exp.Offset: lambda self: self._parse_offset(), 654 exp.Order: lambda self: self._parse_order(), 655 exp.Ordered: lambda self: self._parse_ordered(), 656 exp.Properties: lambda self: self._parse_properties(), 657 exp.Qualify: lambda self: self._parse_qualify(), 658 exp.Returning: lambda self: self._parse_returning(), 659 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 660 exp.Table: lambda self: self._parse_table_parts(), 661 exp.TableAlias: lambda self: self._parse_table_alias(), 662 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 663 exp.Where: lambda self: self._parse_where(), 664 exp.Window: lambda self: self._parse_named_window(), 665 exp.With: lambda self: self._parse_with(), 666 "JOIN_TYPE": lambda self: self._parse_join_parts(), 667 } 668 669 STATEMENT_PARSERS = { 670 TokenType.ALTER: lambda self: self._parse_alter(), 671 TokenType.BEGIN: lambda self: self._parse_transaction(), 672 TokenType.CACHE: lambda self: self._parse_cache(), 673 TokenType.COMMENT: lambda self: self._parse_comment(), 674 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 675 TokenType.COPY: lambda self: self._parse_copy(), 676 TokenType.CREATE: lambda self: self._parse_create(), 677 TokenType.DELETE: lambda self: self._parse_delete(), 678 TokenType.DESC: lambda self: self._parse_describe(), 679 TokenType.DESCRIBE: lambda self: self._parse_describe(), 680 TokenType.DROP: lambda self: self._parse_drop(), 681 TokenType.INSERT: lambda self: self._parse_insert(), 682 TokenType.KILL: lambda self: self._parse_kill(), 683 TokenType.LOAD: lambda self: self._parse_load(), 684 TokenType.MERGE: lambda self: self._parse_merge(), 685 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 686 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 687 TokenType.REFRESH: lambda self: self._parse_refresh(), 688 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 689 TokenType.SET: lambda self: self._parse_set(), 690 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 691 TokenType.UNCACHE: lambda self: self._parse_uncache(), 692 TokenType.UPDATE: lambda self: self._parse_update(), 693 TokenType.USE: lambda self: self.expression( 694 exp.Use, 695 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 696 this=self._parse_table(schema=False), 697 ), 698 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 699 } 700 701 UNARY_PARSERS = { 702 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 703 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 704 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 705 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 706 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 707 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 708 } 709 710 STRING_PARSERS = { 711 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 712 exp.RawString, this=token.text 713 ), 714 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 715 exp.National, this=token.text 716 ), 717 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 718 TokenType.STRING: lambda self, token: self.expression( 719 exp.Literal, this=token.text, is_string=True 720 ), 721 TokenType.UNICODE_STRING: lambda self, token: self.expression( 722 exp.UnicodeString, 723 this=token.text, 724 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 725 ), 726 } 727 728 NUMERIC_PARSERS = { 729 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 730 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 731 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 732 TokenType.NUMBER: lambda self, token: self.expression( 733 exp.Literal, this=token.text, is_string=False 734 ), 735 } 736 737 PRIMARY_PARSERS = { 738 **STRING_PARSERS, 739 **NUMERIC_PARSERS, 740 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 741 TokenType.NULL: lambda self, _: self.expression(exp.Null), 742 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 743 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 744 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 745 TokenType.STAR: lambda self, _: self.expression( 746 exp.Star, 747 **{ 748 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 749 "replace": self._parse_star_op("REPLACE"), 750 "rename": self._parse_star_op("RENAME"), 751 }, 752 ), 753 } 754 755 PLACEHOLDER_PARSERS = { 756 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 757 TokenType.PARAMETER: lambda self: self._parse_parameter(), 758 TokenType.COLON: lambda self: ( 759 self.expression(exp.Placeholder, this=self._prev.text) 760 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 761 else None 762 ), 763 } 764 765 RANGE_PARSERS = { 766 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 767 TokenType.GLOB: binary_range_parser(exp.Glob), 768 TokenType.ILIKE: binary_range_parser(exp.ILike), 769 TokenType.IN: lambda self, this: self._parse_in(this), 770 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 771 TokenType.IS: lambda self, this: self._parse_is(this), 772 TokenType.LIKE: binary_range_parser(exp.Like), 773 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 774 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 775 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 776 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 777 } 778 779 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 780 "ALLOWED_VALUES": lambda self: self.expression( 781 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 782 ), 783 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 784 "AUTO": lambda self: self._parse_auto_property(), 785 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 786 "BACKUP": lambda self: self.expression( 787 exp.BackupProperty, this=self._parse_var(any_token=True) 788 ), 789 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 790 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 791 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHECKSUM": lambda self: self._parse_checksum(), 793 "CLUSTER BY": lambda self: self._parse_cluster(), 794 "CLUSTERED": lambda self: self._parse_clustered_by(), 795 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 796 exp.CollateProperty, **kwargs 797 ), 798 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 799 "CONTAINS": lambda self: self._parse_contains_property(), 800 "COPY": lambda self: self._parse_copy_property(), 801 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 802 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 803 "DEFINER": lambda self: self._parse_definer(), 804 "DETERMINISTIC": lambda self: self.expression( 805 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 806 ), 807 "DISTKEY": lambda self: self._parse_distkey(), 808 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 809 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 810 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 811 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 812 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 813 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 814 "FREESPACE": lambda self: self._parse_freespace(), 815 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 816 "HEAP": lambda self: self.expression(exp.HeapProperty), 817 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 818 "IMMUTABLE": lambda self: self.expression( 819 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 820 ), 821 "INHERITS": lambda self: self.expression( 822 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 823 ), 824 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 825 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 826 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 827 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 828 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 829 "LIKE": lambda self: self._parse_create_like(), 830 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 831 "LOCK": lambda self: self._parse_locking(), 832 "LOCKING": lambda self: self._parse_locking(), 833 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 834 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 835 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 836 "MODIFIES": lambda self: self._parse_modifies_property(), 837 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 838 "NO": lambda self: self._parse_no_property(), 839 "ON": lambda self: self._parse_on_property(), 840 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 841 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 842 "PARTITION": lambda self: self._parse_partitioned_of(), 843 "PARTITION BY": lambda self: self._parse_partitioned_by(), 844 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 846 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 847 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 848 "READS": lambda self: self._parse_reads_property(), 849 "REMOTE": lambda self: self._parse_remote_with_connection(), 850 "RETURNS": lambda self: self._parse_returns(), 851 "STRICT": lambda self: self.expression(exp.StrictProperty), 852 "ROW": lambda self: self._parse_row(), 853 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 854 "SAMPLE": lambda self: self.expression( 855 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 856 ), 857 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 858 "SETTINGS": lambda self: self.expression( 859 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 860 ), 861 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 862 "SORTKEY": lambda self: self._parse_sortkey(), 863 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 864 "STABLE": lambda self: self.expression( 865 exp.StabilityProperty, this=exp.Literal.string("STABLE") 866 ), 867 "STORED": lambda self: self._parse_stored(), 868 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 869 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 870 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 871 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 872 "TO": lambda self: self._parse_to_table(), 873 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 874 "TRANSFORM": lambda self: self.expression( 875 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 876 ), 877 "TTL": lambda self: self._parse_ttl(), 878 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 879 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 880 "VOLATILE": lambda self: self._parse_volatile_property(), 881 "WITH": lambda self: self._parse_with_property(), 882 } 883 884 CONSTRAINT_PARSERS = { 885 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 886 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 887 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 888 "CHARACTER SET": lambda self: self.expression( 889 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 890 ), 891 "CHECK": lambda self: self.expression( 892 exp.CheckColumnConstraint, 893 this=self._parse_wrapped(self._parse_conjunction), 894 enforced=self._match_text_seq("ENFORCED"), 895 ), 896 "COLLATE": lambda self: self.expression( 897 exp.CollateColumnConstraint, this=self._parse_var() 898 ), 899 "COMMENT": lambda self: self.expression( 900 exp.CommentColumnConstraint, this=self._parse_string() 901 ), 902 "COMPRESS": lambda self: self._parse_compress(), 903 "CLUSTERED": lambda self: self.expression( 904 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 905 ), 906 "NONCLUSTERED": lambda self: self.expression( 907 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 908 ), 909 "DEFAULT": lambda self: self.expression( 910 exp.DefaultColumnConstraint, this=self._parse_bitwise() 911 ), 912 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 913 "EPHEMERAL": lambda self: self.expression( 914 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 915 ), 916 "EXCLUDE": lambda self: self.expression( 917 exp.ExcludeColumnConstraint, this=self._parse_index_params() 918 ), 919 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 920 "FORMAT": lambda self: self.expression( 921 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 922 ), 923 "GENERATED": lambda self: self._parse_generated_as_identity(), 924 "IDENTITY": lambda self: self._parse_auto_increment(), 925 "INLINE": lambda self: self._parse_inline(), 926 "LIKE": lambda self: self._parse_create_like(), 927 "NOT": lambda self: self._parse_not_constraint(), 928 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 929 "ON": lambda self: ( 930 self._match(TokenType.UPDATE) 931 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 932 ) 933 or self.expression(exp.OnProperty, this=self._parse_id_var()), 934 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 935 "PERIOD": lambda self: self._parse_period_for_system_time(), 936 "PRIMARY KEY": lambda self: self._parse_primary_key(), 937 "REFERENCES": lambda self: self._parse_references(match=False), 938 "TITLE": lambda self: self.expression( 939 exp.TitleColumnConstraint, this=self._parse_var_or_string() 940 ), 941 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 942 "UNIQUE": lambda self: self._parse_unique(), 943 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 944 "WITH": lambda self: self.expression( 945 exp.Properties, expressions=self._parse_wrapped_properties() 946 ), 947 } 948 949 ALTER_PARSERS = { 950 "ADD": lambda self: self._parse_alter_table_add(), 951 "ALTER": lambda self: self._parse_alter_table_alter(), 952 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 953 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 954 "DROP": lambda self: self._parse_alter_table_drop(), 955 "RENAME": lambda self: self._parse_alter_table_rename(), 956 "SET": lambda self: self._parse_alter_table_set(), 957 } 958 959 ALTER_ALTER_PARSERS = { 960 "DISTKEY": lambda self: self._parse_alter_diststyle(), 961 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 962 "SORTKEY": lambda self: self._parse_alter_sortkey(), 963 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 964 } 965 966 SCHEMA_UNNAMED_CONSTRAINTS = { 967 "CHECK", 968 "EXCLUDE", 969 "FOREIGN KEY", 970 "LIKE", 971 "PERIOD", 972 "PRIMARY KEY", 973 "UNIQUE", 974 } 975 976 NO_PAREN_FUNCTION_PARSERS = { 977 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 978 "CASE": lambda self: self._parse_case(), 979 "IF": lambda self: self._parse_if(), 980 "NEXT": lambda self: self._parse_next_value_for(), 981 } 982 983 INVALID_FUNC_NAME_TOKENS = { 984 TokenType.IDENTIFIER, 985 TokenType.STRING, 986 } 987 988 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 989 990 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 991 992 FUNCTION_PARSERS = { 993 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 994 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 995 "DECODE": lambda self: self._parse_decode(), 996 "EXTRACT": lambda self: self._parse_extract(), 997 "JSON_OBJECT": lambda self: self._parse_json_object(), 998 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 999 "JSON_TABLE": lambda self: self._parse_json_table(), 1000 "MATCH": lambda self: self._parse_match_against(), 1001 "OPENJSON": lambda self: self._parse_open_json(), 1002 "POSITION": lambda self: self._parse_position(), 1003 "PREDICT": lambda self: self._parse_predict(), 1004 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1005 "STRING_AGG": lambda self: self._parse_string_agg(), 1006 "SUBSTRING": lambda self: self._parse_substring(), 1007 "TRIM": lambda self: self._parse_trim(), 1008 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1009 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1010 } 1011 1012 QUERY_MODIFIER_PARSERS = { 1013 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1014 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1015 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1016 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1017 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1018 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1019 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1020 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1021 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1022 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1023 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1024 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1025 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1026 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1027 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.CLUSTER_BY: lambda self: ( 1029 "cluster", 1030 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1031 ), 1032 TokenType.DISTRIBUTE_BY: lambda self: ( 1033 "distribute", 1034 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1035 ), 1036 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1037 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1038 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1039 } 1040 1041 SET_PARSERS = { 1042 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1043 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1044 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1045 "TRANSACTION": lambda self: self._parse_set_transaction(), 1046 } 1047 1048 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1049 1050 TYPE_LITERAL_PARSERS = { 1051 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1052 } 1053 1054 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1055 1056 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1057 1058 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1059 1060 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1061 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1062 "ISOLATION": ( 1063 ("LEVEL", "REPEATABLE", "READ"), 1064 ("LEVEL", "READ", "COMMITTED"), 1065 ("LEVEL", "READ", "UNCOMITTED"), 1066 ("LEVEL", "SERIALIZABLE"), 1067 ), 1068 "READ": ("WRITE", "ONLY"), 1069 } 1070 1071 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1072 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1073 ) 1074 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1075 1076 CREATE_SEQUENCE: OPTIONS_TYPE = { 1077 "SCALE": ("EXTEND", "NOEXTEND"), 1078 "SHARD": ("EXTEND", "NOEXTEND"), 1079 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1080 **dict.fromkeys( 1081 ( 1082 "SESSION", 1083 "GLOBAL", 1084 "KEEP", 1085 "NOKEEP", 1086 "ORDER", 1087 "NOORDER", 1088 "NOCACHE", 1089 "CYCLE", 1090 "NOCYCLE", 1091 "NOMINVALUE", 1092 "NOMAXVALUE", 1093 "NOSCALE", 1094 "NOSHARD", 1095 ), 1096 tuple(), 1097 ), 1098 } 1099 1100 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1101 1102 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1103 1104 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1105 1106 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1107 1108 CLONE_KEYWORDS = {"CLONE", "COPY"} 1109 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1110 1111 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1112 1113 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1114 1115 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1116 1117 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1118 1119 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1120 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1121 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1122 1123 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1124 1125 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1126 1127 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1128 1129 DISTINCT_TOKENS = {TokenType.DISTINCT} 1130 1131 NULL_TOKENS = {TokenType.NULL} 1132 1133 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1134 1135 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1136 1137 STRICT_CAST = True 1138 1139 PREFIXED_PIVOT_COLUMNS = False 1140 IDENTIFY_PIVOT_STRINGS = False 1141 1142 LOG_DEFAULTS_TO_LN = False 1143 1144 # Whether ADD is present for each column added by ALTER TABLE 1145 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1146 1147 # Whether the table sample clause expects CSV syntax 1148 TABLESAMPLE_CSV = False 1149 1150 # The default method used for table sampling 1151 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1152 1153 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1154 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1155 1156 # Whether the TRIM function expects the characters to trim as its first argument 1157 TRIM_PATTERN_FIRST = False 1158 1159 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1160 STRING_ALIASES = False 1161 1162 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1163 MODIFIERS_ATTACHED_TO_UNION = True 1164 UNION_MODIFIERS = {"order", "limit", "offset"} 1165 1166 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1167 NO_PAREN_IF_COMMANDS = True 1168 1169 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1170 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1171 1172 # Whether the `:` operator is used to extract a value from a JSON document 1173 COLON_IS_JSON_EXTRACT = False 1174 1175 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1176 # If this is True and '(' is not found, the keyword will be treated as an identifier 1177 VALUES_FOLLOWED_BY_PAREN = True 1178 1179 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1180 SUPPORTS_IMPLICIT_UNNEST = False 1181 1182 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1183 INTERVAL_SPANS = True 1184 1185 # Whether a PARTITION clause can follow a table reference 1186 SUPPORTS_PARTITION_SELECTION = False 1187 1188 __slots__ = ( 1189 "error_level", 1190 "error_message_context", 1191 "max_errors", 1192 "dialect", 1193 "sql", 1194 "errors", 1195 "_tokens", 1196 "_index", 1197 "_curr", 1198 "_next", 1199 "_prev", 1200 "_prev_comments", 1201 ) 1202 1203 # Autofilled 1204 SHOW_TRIE: t.Dict = {} 1205 SET_TRIE: t.Dict = {} 1206 1207 def __init__( 1208 self, 1209 error_level: t.Optional[ErrorLevel] = None, 1210 error_message_context: int = 100, 1211 max_errors: int = 3, 1212 dialect: DialectType = None, 1213 ): 1214 from sqlglot.dialects import Dialect 1215 1216 self.error_level = error_level or ErrorLevel.IMMEDIATE 1217 self.error_message_context = error_message_context 1218 self.max_errors = max_errors 1219 self.dialect = Dialect.get_or_raise(dialect) 1220 self.reset() 1221 1222 def reset(self): 1223 self.sql = "" 1224 self.errors = [] 1225 self._tokens = [] 1226 self._index = 0 1227 self._curr = None 1228 self._next = None 1229 self._prev = None 1230 self._prev_comments = None 1231 1232 def parse( 1233 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1234 ) -> t.List[t.Optional[exp.Expression]]: 1235 """ 1236 Parses a list of tokens and returns a list of syntax trees, one tree 1237 per parsed SQL statement. 1238 1239 Args: 1240 raw_tokens: The list of tokens. 1241 sql: The original SQL string, used to produce helpful debug messages. 1242 1243 Returns: 1244 The list of the produced syntax trees. 1245 """ 1246 return self._parse( 1247 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1248 ) 1249 1250 def parse_into( 1251 self, 1252 expression_types: exp.IntoType, 1253 raw_tokens: t.List[Token], 1254 sql: t.Optional[str] = None, 1255 ) -> t.List[t.Optional[exp.Expression]]: 1256 """ 1257 Parses a list of tokens into a given Expression type. If a collection of Expression 1258 types is given instead, this method will try to parse the token list into each one 1259 of them, stopping at the first for which the parsing succeeds. 1260 1261 Args: 1262 expression_types: The expression type(s) to try and parse the token list into. 1263 raw_tokens: The list of tokens. 1264 sql: The original SQL string, used to produce helpful debug messages. 1265 1266 Returns: 1267 The target Expression. 1268 """ 1269 errors = [] 1270 for expression_type in ensure_list(expression_types): 1271 parser = self.EXPRESSION_PARSERS.get(expression_type) 1272 if not parser: 1273 raise TypeError(f"No parser registered for {expression_type}") 1274 1275 try: 1276 return self._parse(parser, raw_tokens, sql) 1277 except ParseError as e: 1278 e.errors[0]["into_expression"] = expression_type 1279 errors.append(e) 1280 1281 raise ParseError( 1282 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1283 errors=merge_errors(errors), 1284 ) from errors[-1] 1285 1286 def _parse( 1287 self, 1288 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1289 raw_tokens: t.List[Token], 1290 sql: t.Optional[str] = None, 1291 ) -> t.List[t.Optional[exp.Expression]]: 1292 self.reset() 1293 self.sql = sql or "" 1294 1295 total = len(raw_tokens) 1296 chunks: t.List[t.List[Token]] = [[]] 1297 1298 for i, token in enumerate(raw_tokens): 1299 if token.token_type == TokenType.SEMICOLON: 1300 if token.comments: 1301 chunks.append([token]) 1302 1303 if i < total - 1: 1304 chunks.append([]) 1305 else: 1306 chunks[-1].append(token) 1307 1308 expressions = [] 1309 1310 for tokens in chunks: 1311 self._index = -1 1312 self._tokens = tokens 1313 self._advance() 1314 1315 expressions.append(parse_method(self)) 1316 1317 if self._index < len(self._tokens): 1318 self.raise_error("Invalid expression / Unexpected token") 1319 1320 self.check_errors() 1321 1322 return expressions 1323 1324 def check_errors(self) -> None: 1325 """Logs or raises any found errors, depending on the chosen error level setting.""" 1326 if self.error_level == ErrorLevel.WARN: 1327 for error in self.errors: 1328 logger.error(str(error)) 1329 elif self.error_level == ErrorLevel.RAISE and self.errors: 1330 raise ParseError( 1331 concat_messages(self.errors, self.max_errors), 1332 errors=merge_errors(self.errors), 1333 ) 1334 1335 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1336 """ 1337 Appends an error in the list of recorded errors or raises it, depending on the chosen 1338 error level setting. 1339 """ 1340 token = token or self._curr or self._prev or Token.string("") 1341 start = token.start 1342 end = token.end + 1 1343 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1344 highlight = self.sql[start:end] 1345 end_context = self.sql[end : end + self.error_message_context] 1346 1347 error = ParseError.new( 1348 f"{message}. Line {token.line}, Col: {token.col}.\n" 1349 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1350 description=message, 1351 line=token.line, 1352 col=token.col, 1353 start_context=start_context, 1354 highlight=highlight, 1355 end_context=end_context, 1356 ) 1357 1358 if self.error_level == ErrorLevel.IMMEDIATE: 1359 raise error 1360 1361 self.errors.append(error) 1362 1363 def expression( 1364 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1365 ) -> E: 1366 """ 1367 Creates a new, validated Expression. 1368 1369 Args: 1370 exp_class: The expression class to instantiate. 1371 comments: An optional list of comments to attach to the expression. 1372 kwargs: The arguments to set for the expression along with their respective values. 1373 1374 Returns: 1375 The target expression. 1376 """ 1377 instance = exp_class(**kwargs) 1378 instance.add_comments(comments) if comments else self._add_comments(instance) 1379 return self.validate_expression(instance) 1380 1381 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1382 if expression and self._prev_comments: 1383 expression.add_comments(self._prev_comments) 1384 self._prev_comments = None 1385 1386 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1387 """ 1388 Validates an Expression, making sure that all its mandatory arguments are set. 1389 1390 Args: 1391 expression: The expression to validate. 1392 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1393 1394 Returns: 1395 The validated expression. 1396 """ 1397 if self.error_level != ErrorLevel.IGNORE: 1398 for error_message in expression.error_messages(args): 1399 self.raise_error(error_message) 1400 1401 return expression 1402 1403 def _find_sql(self, start: Token, end: Token) -> str: 1404 return self.sql[start.start : end.end + 1] 1405 1406 def _is_connected(self) -> bool: 1407 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1408 1409 def _advance(self, times: int = 1) -> None: 1410 self._index += times 1411 self._curr = seq_get(self._tokens, self._index) 1412 self._next = seq_get(self._tokens, self._index + 1) 1413 1414 if self._index > 0: 1415 self._prev = self._tokens[self._index - 1] 1416 self._prev_comments = self._prev.comments 1417 else: 1418 self._prev = None 1419 self._prev_comments = None 1420 1421 def _retreat(self, index: int) -> None: 1422 if index != self._index: 1423 self._advance(index - self._index) 1424 1425 def _warn_unsupported(self) -> None: 1426 if len(self._tokens) <= 1: 1427 return 1428 1429 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1430 # interested in emitting a warning for the one being currently processed. 1431 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1432 1433 logger.warning( 1434 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1435 ) 1436 1437 def _parse_command(self) -> exp.Command: 1438 self._warn_unsupported() 1439 return self.expression( 1440 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1441 ) 1442 1443 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1444 """ 1445 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1446 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1447 the parser state accordingly 1448 """ 1449 index = self._index 1450 error_level = self.error_level 1451 1452 self.error_level = ErrorLevel.IMMEDIATE 1453 try: 1454 this = parse_method() 1455 except ParseError: 1456 this = None 1457 finally: 1458 if not this or retreat: 1459 self._retreat(index) 1460 self.error_level = error_level 1461 1462 return this 1463 1464 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1465 start = self._prev 1466 exists = self._parse_exists() if allow_exists else None 1467 1468 self._match(TokenType.ON) 1469 1470 materialized = self._match_text_seq("MATERIALIZED") 1471 kind = self._match_set(self.CREATABLES) and self._prev 1472 if not kind: 1473 return self._parse_as_command(start) 1474 1475 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1476 this = self._parse_user_defined_function(kind=kind.token_type) 1477 elif kind.token_type == TokenType.TABLE: 1478 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1479 elif kind.token_type == TokenType.COLUMN: 1480 this = self._parse_column() 1481 else: 1482 this = self._parse_id_var() 1483 1484 self._match(TokenType.IS) 1485 1486 return self.expression( 1487 exp.Comment, 1488 this=this, 1489 kind=kind.text, 1490 expression=self._parse_string(), 1491 exists=exists, 1492 materialized=materialized, 1493 ) 1494 1495 def _parse_to_table( 1496 self, 1497 ) -> exp.ToTableProperty: 1498 table = self._parse_table_parts(schema=True) 1499 return self.expression(exp.ToTableProperty, this=table) 1500 1501 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1502 def _parse_ttl(self) -> exp.Expression: 1503 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1504 this = self._parse_bitwise() 1505 1506 if self._match_text_seq("DELETE"): 1507 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1508 if self._match_text_seq("RECOMPRESS"): 1509 return self.expression( 1510 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1511 ) 1512 if self._match_text_seq("TO", "DISK"): 1513 return self.expression( 1514 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1515 ) 1516 if self._match_text_seq("TO", "VOLUME"): 1517 return self.expression( 1518 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1519 ) 1520 1521 return this 1522 1523 expressions = self._parse_csv(_parse_ttl_action) 1524 where = self._parse_where() 1525 group = self._parse_group() 1526 1527 aggregates = None 1528 if group and self._match(TokenType.SET): 1529 aggregates = self._parse_csv(self._parse_set_item) 1530 1531 return self.expression( 1532 exp.MergeTreeTTL, 1533 expressions=expressions, 1534 where=where, 1535 group=group, 1536 aggregates=aggregates, 1537 ) 1538 1539 def _parse_statement(self) -> t.Optional[exp.Expression]: 1540 if self._curr is None: 1541 return None 1542 1543 if self._match_set(self.STATEMENT_PARSERS): 1544 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1545 1546 if self._match_set(self.dialect.tokenizer.COMMANDS): 1547 return self._parse_command() 1548 1549 expression = self._parse_expression() 1550 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1551 return self._parse_query_modifiers(expression) 1552 1553 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1554 start = self._prev 1555 temporary = self._match(TokenType.TEMPORARY) 1556 materialized = self._match_text_seq("MATERIALIZED") 1557 1558 kind = self._match_set(self.CREATABLES) and self._prev.text 1559 if not kind: 1560 return self._parse_as_command(start) 1561 1562 if_exists = exists or self._parse_exists() 1563 table = self._parse_table_parts( 1564 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1565 ) 1566 1567 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1568 1569 if self._match(TokenType.L_PAREN, advance=False): 1570 expressions = self._parse_wrapped_csv(self._parse_types) 1571 else: 1572 expressions = None 1573 1574 return self.expression( 1575 exp.Drop, 1576 comments=start.comments, 1577 exists=if_exists, 1578 this=table, 1579 expressions=expressions, 1580 kind=kind.upper(), 1581 temporary=temporary, 1582 materialized=materialized, 1583 cascade=self._match_text_seq("CASCADE"), 1584 constraints=self._match_text_seq("CONSTRAINTS"), 1585 purge=self._match_text_seq("PURGE"), 1586 cluster=cluster, 1587 ) 1588 1589 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1590 return ( 1591 self._match_text_seq("IF") 1592 and (not not_ or self._match(TokenType.NOT)) 1593 and self._match(TokenType.EXISTS) 1594 ) 1595 1596 def _parse_create(self) -> exp.Create | exp.Command: 1597 # Note: this can't be None because we've matched a statement parser 1598 start = self._prev 1599 comments = self._prev_comments 1600 1601 replace = ( 1602 start.token_type == TokenType.REPLACE 1603 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1604 or self._match_pair(TokenType.OR, TokenType.ALTER) 1605 ) 1606 1607 unique = self._match(TokenType.UNIQUE) 1608 1609 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1610 self._advance() 1611 1612 properties = None 1613 create_token = self._match_set(self.CREATABLES) and self._prev 1614 1615 if not create_token: 1616 # exp.Properties.Location.POST_CREATE 1617 properties = self._parse_properties() 1618 create_token = self._match_set(self.CREATABLES) and self._prev 1619 1620 if not properties or not create_token: 1621 return self._parse_as_command(start) 1622 1623 exists = self._parse_exists(not_=True) 1624 this = None 1625 expression: t.Optional[exp.Expression] = None 1626 indexes = None 1627 no_schema_binding = None 1628 begin = None 1629 end = None 1630 clone = None 1631 1632 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1633 nonlocal properties 1634 if properties and temp_props: 1635 properties.expressions.extend(temp_props.expressions) 1636 elif temp_props: 1637 properties = temp_props 1638 1639 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1640 this = self._parse_user_defined_function(kind=create_token.token_type) 1641 1642 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1643 extend_props(self._parse_properties()) 1644 1645 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1646 extend_props(self._parse_properties()) 1647 1648 if not expression: 1649 if self._match(TokenType.COMMAND): 1650 expression = self._parse_as_command(self._prev) 1651 else: 1652 begin = self._match(TokenType.BEGIN) 1653 return_ = self._match_text_seq("RETURN") 1654 1655 if self._match(TokenType.STRING, advance=False): 1656 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1657 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1658 expression = self._parse_string() 1659 extend_props(self._parse_properties()) 1660 else: 1661 expression = self._parse_statement() 1662 1663 end = self._match_text_seq("END") 1664 1665 if return_: 1666 expression = self.expression(exp.Return, this=expression) 1667 elif create_token.token_type == TokenType.INDEX: 1668 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1669 if not self._match(TokenType.ON): 1670 index = self._parse_id_var() 1671 anonymous = False 1672 else: 1673 index = None 1674 anonymous = True 1675 1676 this = self._parse_index(index=index, anonymous=anonymous) 1677 elif create_token.token_type in self.DB_CREATABLES: 1678 table_parts = self._parse_table_parts( 1679 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1680 ) 1681 1682 # exp.Properties.Location.POST_NAME 1683 self._match(TokenType.COMMA) 1684 extend_props(self._parse_properties(before=True)) 1685 1686 this = self._parse_schema(this=table_parts) 1687 1688 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1689 extend_props(self._parse_properties()) 1690 1691 self._match(TokenType.ALIAS) 1692 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1693 # exp.Properties.Location.POST_ALIAS 1694 extend_props(self._parse_properties()) 1695 1696 if create_token.token_type == TokenType.SEQUENCE: 1697 expression = self._parse_types() 1698 extend_props(self._parse_properties()) 1699 else: 1700 expression = self._parse_ddl_select() 1701 1702 if create_token.token_type == TokenType.TABLE: 1703 # exp.Properties.Location.POST_EXPRESSION 1704 extend_props(self._parse_properties()) 1705 1706 indexes = [] 1707 while True: 1708 index = self._parse_index() 1709 1710 # exp.Properties.Location.POST_INDEX 1711 extend_props(self._parse_properties()) 1712 1713 if not index: 1714 break 1715 else: 1716 self._match(TokenType.COMMA) 1717 indexes.append(index) 1718 elif create_token.token_type == TokenType.VIEW: 1719 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1720 no_schema_binding = True 1721 1722 shallow = self._match_text_seq("SHALLOW") 1723 1724 if self._match_texts(self.CLONE_KEYWORDS): 1725 copy = self._prev.text.lower() == "copy" 1726 clone = self.expression( 1727 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1728 ) 1729 1730 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1731 return self._parse_as_command(start) 1732 1733 return self.expression( 1734 exp.Create, 1735 comments=comments, 1736 this=this, 1737 kind=create_token.text.upper(), 1738 replace=replace, 1739 unique=unique, 1740 expression=expression, 1741 exists=exists, 1742 properties=properties, 1743 indexes=indexes, 1744 no_schema_binding=no_schema_binding, 1745 begin=begin, 1746 end=end, 1747 clone=clone, 1748 ) 1749 1750 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1751 seq = exp.SequenceProperties() 1752 1753 options = [] 1754 index = self._index 1755 1756 while self._curr: 1757 self._match(TokenType.COMMA) 1758 if self._match_text_seq("INCREMENT"): 1759 self._match_text_seq("BY") 1760 self._match_text_seq("=") 1761 seq.set("increment", self._parse_term()) 1762 elif self._match_text_seq("MINVALUE"): 1763 seq.set("minvalue", self._parse_term()) 1764 elif self._match_text_seq("MAXVALUE"): 1765 seq.set("maxvalue", self._parse_term()) 1766 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1767 self._match_text_seq("=") 1768 seq.set("start", self._parse_term()) 1769 elif self._match_text_seq("CACHE"): 1770 # T-SQL allows empty CACHE which is initialized dynamically 1771 seq.set("cache", self._parse_number() or True) 1772 elif self._match_text_seq("OWNED", "BY"): 1773 # "OWNED BY NONE" is the default 1774 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1775 else: 1776 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1777 if opt: 1778 options.append(opt) 1779 else: 1780 break 1781 1782 seq.set("options", options if options else None) 1783 return None if self._index == index else seq 1784 1785 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1786 # only used for teradata currently 1787 self._match(TokenType.COMMA) 1788 1789 kwargs = { 1790 "no": self._match_text_seq("NO"), 1791 "dual": self._match_text_seq("DUAL"), 1792 "before": self._match_text_seq("BEFORE"), 1793 "default": self._match_text_seq("DEFAULT"), 1794 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1795 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1796 "after": self._match_text_seq("AFTER"), 1797 "minimum": self._match_texts(("MIN", "MINIMUM")), 1798 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1799 } 1800 1801 if self._match_texts(self.PROPERTY_PARSERS): 1802 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1803 try: 1804 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1805 except TypeError: 1806 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1807 1808 return None 1809 1810 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1811 return self._parse_wrapped_csv(self._parse_property) 1812 1813 def _parse_property(self) -> t.Optional[exp.Expression]: 1814 if self._match_texts(self.PROPERTY_PARSERS): 1815 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1816 1817 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1818 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1819 1820 if self._match_text_seq("COMPOUND", "SORTKEY"): 1821 return self._parse_sortkey(compound=True) 1822 1823 if self._match_text_seq("SQL", "SECURITY"): 1824 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1825 1826 index = self._index 1827 key = self._parse_column() 1828 1829 if not self._match(TokenType.EQ): 1830 self._retreat(index) 1831 return self._parse_sequence_properties() 1832 1833 return self.expression( 1834 exp.Property, 1835 this=key.to_dot() if isinstance(key, exp.Column) else key, 1836 value=self._parse_bitwise() or self._parse_var(any_token=True), 1837 ) 1838 1839 def _parse_stored(self) -> exp.FileFormatProperty: 1840 self._match(TokenType.ALIAS) 1841 1842 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1843 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1844 1845 return self.expression( 1846 exp.FileFormatProperty, 1847 this=( 1848 self.expression( 1849 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1850 ) 1851 if input_format or output_format 1852 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1853 ), 1854 ) 1855 1856 def _parse_unquoted_field(self): 1857 field = self._parse_field() 1858 if isinstance(field, exp.Identifier) and not field.quoted: 1859 field = exp.var(field) 1860 1861 return field 1862 1863 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1864 self._match(TokenType.EQ) 1865 self._match(TokenType.ALIAS) 1866 1867 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1868 1869 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1870 properties = [] 1871 while True: 1872 if before: 1873 prop = self._parse_property_before() 1874 else: 1875 prop = self._parse_property() 1876 if not prop: 1877 break 1878 for p in ensure_list(prop): 1879 properties.append(p) 1880 1881 if properties: 1882 return self.expression(exp.Properties, expressions=properties) 1883 1884 return None 1885 1886 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1887 return self.expression( 1888 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1889 ) 1890 1891 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1892 if self._index >= 2: 1893 pre_volatile_token = self._tokens[self._index - 2] 1894 else: 1895 pre_volatile_token = None 1896 1897 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1898 return exp.VolatileProperty() 1899 1900 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1901 1902 def _parse_retention_period(self) -> exp.Var: 1903 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1904 number = self._parse_number() 1905 number_str = f"{number} " if number else "" 1906 unit = self._parse_var(any_token=True) 1907 return exp.var(f"{number_str}{unit}") 1908 1909 def _parse_system_versioning_property( 1910 self, with_: bool = False 1911 ) -> exp.WithSystemVersioningProperty: 1912 self._match(TokenType.EQ) 1913 prop = self.expression( 1914 exp.WithSystemVersioningProperty, 1915 **{ # type: ignore 1916 "on": True, 1917 "with": with_, 1918 }, 1919 ) 1920 1921 if self._match_text_seq("OFF"): 1922 prop.set("on", False) 1923 return prop 1924 1925 self._match(TokenType.ON) 1926 if self._match(TokenType.L_PAREN): 1927 while self._curr and not self._match(TokenType.R_PAREN): 1928 if self._match_text_seq("HISTORY_TABLE", "="): 1929 prop.set("this", self._parse_table_parts()) 1930 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1931 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1932 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1933 prop.set("retention_period", self._parse_retention_period()) 1934 1935 self._match(TokenType.COMMA) 1936 1937 return prop 1938 1939 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1940 self._match(TokenType.EQ) 1941 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1942 prop = self.expression(exp.DataDeletionProperty, on=on) 1943 1944 if self._match(TokenType.L_PAREN): 1945 while self._curr and not self._match(TokenType.R_PAREN): 1946 if self._match_text_seq("FILTER_COLUMN", "="): 1947 prop.set("filter_column", self._parse_column()) 1948 elif self._match_text_seq("RETENTION_PERIOD", "="): 1949 prop.set("retention_period", self._parse_retention_period()) 1950 1951 self._match(TokenType.COMMA) 1952 1953 return prop 1954 1955 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1956 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1957 prop = self._parse_system_versioning_property(with_=True) 1958 self._match_r_paren() 1959 return prop 1960 1961 if self._match(TokenType.L_PAREN, advance=False): 1962 return self._parse_wrapped_properties() 1963 1964 if self._match_text_seq("JOURNAL"): 1965 return self._parse_withjournaltable() 1966 1967 if self._match_texts(self.VIEW_ATTRIBUTES): 1968 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1969 1970 if self._match_text_seq("DATA"): 1971 return self._parse_withdata(no=False) 1972 elif self._match_text_seq("NO", "DATA"): 1973 return self._parse_withdata(no=True) 1974 1975 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1976 return self._parse_serde_properties(with_=True) 1977 1978 if not self._next: 1979 return None 1980 1981 return self._parse_withisolatedloading() 1982 1983 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1984 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1985 self._match(TokenType.EQ) 1986 1987 user = self._parse_id_var() 1988 self._match(TokenType.PARAMETER) 1989 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1990 1991 if not user or not host: 1992 return None 1993 1994 return exp.DefinerProperty(this=f"{user}@{host}") 1995 1996 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1997 self._match(TokenType.TABLE) 1998 self._match(TokenType.EQ) 1999 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2000 2001 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2002 return self.expression(exp.LogProperty, no=no) 2003 2004 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2005 return self.expression(exp.JournalProperty, **kwargs) 2006 2007 def _parse_checksum(self) -> exp.ChecksumProperty: 2008 self._match(TokenType.EQ) 2009 2010 on = None 2011 if self._match(TokenType.ON): 2012 on = True 2013 elif self._match_text_seq("OFF"): 2014 on = False 2015 2016 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2017 2018 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2019 return self.expression( 2020 exp.Cluster, 2021 expressions=( 2022 self._parse_wrapped_csv(self._parse_ordered) 2023 if wrapped 2024 else self._parse_csv(self._parse_ordered) 2025 ), 2026 ) 2027 2028 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2029 self._match_text_seq("BY") 2030 2031 self._match_l_paren() 2032 expressions = self._parse_csv(self._parse_column) 2033 self._match_r_paren() 2034 2035 if self._match_text_seq("SORTED", "BY"): 2036 self._match_l_paren() 2037 sorted_by = self._parse_csv(self._parse_ordered) 2038 self._match_r_paren() 2039 else: 2040 sorted_by = None 2041 2042 self._match(TokenType.INTO) 2043 buckets = self._parse_number() 2044 self._match_text_seq("BUCKETS") 2045 2046 return self.expression( 2047 exp.ClusteredByProperty, 2048 expressions=expressions, 2049 sorted_by=sorted_by, 2050 buckets=buckets, 2051 ) 2052 2053 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2054 if not self._match_text_seq("GRANTS"): 2055 self._retreat(self._index - 1) 2056 return None 2057 2058 return self.expression(exp.CopyGrantsProperty) 2059 2060 def _parse_freespace(self) -> exp.FreespaceProperty: 2061 self._match(TokenType.EQ) 2062 return self.expression( 2063 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2064 ) 2065 2066 def _parse_mergeblockratio( 2067 self, no: bool = False, default: bool = False 2068 ) -> exp.MergeBlockRatioProperty: 2069 if self._match(TokenType.EQ): 2070 return self.expression( 2071 exp.MergeBlockRatioProperty, 2072 this=self._parse_number(), 2073 percent=self._match(TokenType.PERCENT), 2074 ) 2075 2076 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2077 2078 def _parse_datablocksize( 2079 self, 2080 default: t.Optional[bool] = None, 2081 minimum: t.Optional[bool] = None, 2082 maximum: t.Optional[bool] = None, 2083 ) -> exp.DataBlocksizeProperty: 2084 self._match(TokenType.EQ) 2085 size = self._parse_number() 2086 2087 units = None 2088 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2089 units = self._prev.text 2090 2091 return self.expression( 2092 exp.DataBlocksizeProperty, 2093 size=size, 2094 units=units, 2095 default=default, 2096 minimum=minimum, 2097 maximum=maximum, 2098 ) 2099 2100 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2101 self._match(TokenType.EQ) 2102 always = self._match_text_seq("ALWAYS") 2103 manual = self._match_text_seq("MANUAL") 2104 never = self._match_text_seq("NEVER") 2105 default = self._match_text_seq("DEFAULT") 2106 2107 autotemp = None 2108 if self._match_text_seq("AUTOTEMP"): 2109 autotemp = self._parse_schema() 2110 2111 return self.expression( 2112 exp.BlockCompressionProperty, 2113 always=always, 2114 manual=manual, 2115 never=never, 2116 default=default, 2117 autotemp=autotemp, 2118 ) 2119 2120 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2121 index = self._index 2122 no = self._match_text_seq("NO") 2123 concurrent = self._match_text_seq("CONCURRENT") 2124 2125 if not self._match_text_seq("ISOLATED", "LOADING"): 2126 self._retreat(index) 2127 return None 2128 2129 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2130 return self.expression( 2131 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2132 ) 2133 2134 def _parse_locking(self) -> exp.LockingProperty: 2135 if self._match(TokenType.TABLE): 2136 kind = "TABLE" 2137 elif self._match(TokenType.VIEW): 2138 kind = "VIEW" 2139 elif self._match(TokenType.ROW): 2140 kind = "ROW" 2141 elif self._match_text_seq("DATABASE"): 2142 kind = "DATABASE" 2143 else: 2144 kind = None 2145 2146 if kind in ("DATABASE", "TABLE", "VIEW"): 2147 this = self._parse_table_parts() 2148 else: 2149 this = None 2150 2151 if self._match(TokenType.FOR): 2152 for_or_in = "FOR" 2153 elif self._match(TokenType.IN): 2154 for_or_in = "IN" 2155 else: 2156 for_or_in = None 2157 2158 if self._match_text_seq("ACCESS"): 2159 lock_type = "ACCESS" 2160 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2161 lock_type = "EXCLUSIVE" 2162 elif self._match_text_seq("SHARE"): 2163 lock_type = "SHARE" 2164 elif self._match_text_seq("READ"): 2165 lock_type = "READ" 2166 elif self._match_text_seq("WRITE"): 2167 lock_type = "WRITE" 2168 elif self._match_text_seq("CHECKSUM"): 2169 lock_type = "CHECKSUM" 2170 else: 2171 lock_type = None 2172 2173 override = self._match_text_seq("OVERRIDE") 2174 2175 return self.expression( 2176 exp.LockingProperty, 2177 this=this, 2178 kind=kind, 2179 for_or_in=for_or_in, 2180 lock_type=lock_type, 2181 override=override, 2182 ) 2183 2184 def _parse_partition_by(self) -> t.List[exp.Expression]: 2185 if self._match(TokenType.PARTITION_BY): 2186 return self._parse_csv(self._parse_conjunction) 2187 return [] 2188 2189 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2190 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2191 if self._match_text_seq("MINVALUE"): 2192 return exp.var("MINVALUE") 2193 if self._match_text_seq("MAXVALUE"): 2194 return exp.var("MAXVALUE") 2195 return self._parse_bitwise() 2196 2197 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2198 expression = None 2199 from_expressions = None 2200 to_expressions = None 2201 2202 if self._match(TokenType.IN): 2203 this = self._parse_wrapped_csv(self._parse_bitwise) 2204 elif self._match(TokenType.FROM): 2205 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2206 self._match_text_seq("TO") 2207 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2208 elif self._match_text_seq("WITH", "(", "MODULUS"): 2209 this = self._parse_number() 2210 self._match_text_seq(",", "REMAINDER") 2211 expression = self._parse_number() 2212 self._match_r_paren() 2213 else: 2214 self.raise_error("Failed to parse partition bound spec.") 2215 2216 return self.expression( 2217 exp.PartitionBoundSpec, 2218 this=this, 2219 expression=expression, 2220 from_expressions=from_expressions, 2221 to_expressions=to_expressions, 2222 ) 2223 2224 # https://www.postgresql.org/docs/current/sql-createtable.html 2225 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2226 if not self._match_text_seq("OF"): 2227 self._retreat(self._index - 1) 2228 return None 2229 2230 this = self._parse_table(schema=True) 2231 2232 if self._match(TokenType.DEFAULT): 2233 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2234 elif self._match_text_seq("FOR", "VALUES"): 2235 expression = self._parse_partition_bound_spec() 2236 else: 2237 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2238 2239 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2240 2241 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2242 self._match(TokenType.EQ) 2243 return self.expression( 2244 exp.PartitionedByProperty, 2245 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2246 ) 2247 2248 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2249 if self._match_text_seq("AND", "STATISTICS"): 2250 statistics = True 2251 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2252 statistics = False 2253 else: 2254 statistics = None 2255 2256 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2257 2258 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2259 if self._match_text_seq("SQL"): 2260 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2261 return None 2262 2263 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2264 if self._match_text_seq("SQL", "DATA"): 2265 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2266 return None 2267 2268 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2269 if self._match_text_seq("PRIMARY", "INDEX"): 2270 return exp.NoPrimaryIndexProperty() 2271 if self._match_text_seq("SQL"): 2272 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2273 return None 2274 2275 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2276 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2277 return exp.OnCommitProperty() 2278 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2279 return exp.OnCommitProperty(delete=True) 2280 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2281 2282 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2283 if self._match_text_seq("SQL", "DATA"): 2284 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2285 return None 2286 2287 def _parse_distkey(self) -> exp.DistKeyProperty: 2288 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2289 2290 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2291 table = self._parse_table(schema=True) 2292 2293 options = [] 2294 while self._match_texts(("INCLUDING", "EXCLUDING")): 2295 this = self._prev.text.upper() 2296 2297 id_var = self._parse_id_var() 2298 if not id_var: 2299 return None 2300 2301 options.append( 2302 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2303 ) 2304 2305 return self.expression(exp.LikeProperty, this=table, expressions=options) 2306 2307 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2308 return self.expression( 2309 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2310 ) 2311 2312 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2313 self._match(TokenType.EQ) 2314 return self.expression( 2315 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2316 ) 2317 2318 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2319 self._match_text_seq("WITH", "CONNECTION") 2320 return self.expression( 2321 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2322 ) 2323 2324 def _parse_returns(self) -> exp.ReturnsProperty: 2325 value: t.Optional[exp.Expression] 2326 null = None 2327 is_table = self._match(TokenType.TABLE) 2328 2329 if is_table: 2330 if self._match(TokenType.LT): 2331 value = self.expression( 2332 exp.Schema, 2333 this="TABLE", 2334 expressions=self._parse_csv(self._parse_struct_types), 2335 ) 2336 if not self._match(TokenType.GT): 2337 self.raise_error("Expecting >") 2338 else: 2339 value = self._parse_schema(exp.var("TABLE")) 2340 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2341 null = True 2342 value = None 2343 else: 2344 value = self._parse_types() 2345 2346 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2347 2348 def _parse_describe(self) -> exp.Describe: 2349 kind = self._match_set(self.CREATABLES) and self._prev.text 2350 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2351 if self._match(TokenType.DOT): 2352 style = None 2353 self._retreat(self._index - 2) 2354 this = self._parse_table(schema=True) 2355 properties = self._parse_properties() 2356 expressions = properties.expressions if properties else None 2357 return self.expression( 2358 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2359 ) 2360 2361 def _parse_insert(self) -> exp.Insert: 2362 comments = ensure_list(self._prev_comments) 2363 hint = self._parse_hint() 2364 overwrite = self._match(TokenType.OVERWRITE) 2365 ignore = self._match(TokenType.IGNORE) 2366 local = self._match_text_seq("LOCAL") 2367 alternative = None 2368 is_function = None 2369 2370 if self._match_text_seq("DIRECTORY"): 2371 this: t.Optional[exp.Expression] = self.expression( 2372 exp.Directory, 2373 this=self._parse_var_or_string(), 2374 local=local, 2375 row_format=self._parse_row_format(match_row=True), 2376 ) 2377 else: 2378 if self._match(TokenType.OR): 2379 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2380 2381 self._match(TokenType.INTO) 2382 comments += ensure_list(self._prev_comments) 2383 self._match(TokenType.TABLE) 2384 is_function = self._match(TokenType.FUNCTION) 2385 2386 this = ( 2387 self._parse_table(schema=True, parse_partition=True) 2388 if not is_function 2389 else self._parse_function() 2390 ) 2391 2392 returning = self._parse_returning() 2393 2394 return self.expression( 2395 exp.Insert, 2396 comments=comments, 2397 hint=hint, 2398 is_function=is_function, 2399 this=this, 2400 stored=self._match_text_seq("STORED") and self._parse_stored(), 2401 by_name=self._match_text_seq("BY", "NAME"), 2402 exists=self._parse_exists(), 2403 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2404 and self._parse_conjunction(), 2405 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2406 conflict=self._parse_on_conflict(), 2407 returning=returning or self._parse_returning(), 2408 overwrite=overwrite, 2409 alternative=alternative, 2410 ignore=ignore, 2411 ) 2412 2413 def _parse_kill(self) -> exp.Kill: 2414 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2415 2416 return self.expression( 2417 exp.Kill, 2418 this=self._parse_primary(), 2419 kind=kind, 2420 ) 2421 2422 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2423 conflict = self._match_text_seq("ON", "CONFLICT") 2424 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2425 2426 if not conflict and not duplicate: 2427 return None 2428 2429 conflict_keys = None 2430 constraint = None 2431 2432 if conflict: 2433 if self._match_text_seq("ON", "CONSTRAINT"): 2434 constraint = self._parse_id_var() 2435 elif self._match(TokenType.L_PAREN): 2436 conflict_keys = self._parse_csv(self._parse_id_var) 2437 self._match_r_paren() 2438 2439 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2440 if self._prev.token_type == TokenType.UPDATE: 2441 self._match(TokenType.SET) 2442 expressions = self._parse_csv(self._parse_equality) 2443 else: 2444 expressions = None 2445 2446 return self.expression( 2447 exp.OnConflict, 2448 duplicate=duplicate, 2449 expressions=expressions, 2450 action=action, 2451 conflict_keys=conflict_keys, 2452 constraint=constraint, 2453 ) 2454 2455 def _parse_returning(self) -> t.Optional[exp.Returning]: 2456 if not self._match(TokenType.RETURNING): 2457 return None 2458 return self.expression( 2459 exp.Returning, 2460 expressions=self._parse_csv(self._parse_expression), 2461 into=self._match(TokenType.INTO) and self._parse_table_part(), 2462 ) 2463 2464 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2465 if not self._match(TokenType.FORMAT): 2466 return None 2467 return self._parse_row_format() 2468 2469 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2470 index = self._index 2471 with_ = with_ or self._match_text_seq("WITH") 2472 2473 if not self._match(TokenType.SERDE_PROPERTIES): 2474 self._retreat(index) 2475 return None 2476 return self.expression( 2477 exp.SerdeProperties, 2478 **{ # type: ignore 2479 "expressions": self._parse_wrapped_properties(), 2480 "with": with_, 2481 }, 2482 ) 2483 2484 def _parse_row_format( 2485 self, match_row: bool = False 2486 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2487 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2488 return None 2489 2490 if self._match_text_seq("SERDE"): 2491 this = self._parse_string() 2492 2493 serde_properties = self._parse_serde_properties() 2494 2495 return self.expression( 2496 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2497 ) 2498 2499 self._match_text_seq("DELIMITED") 2500 2501 kwargs = {} 2502 2503 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2504 kwargs["fields"] = self._parse_string() 2505 if self._match_text_seq("ESCAPED", "BY"): 2506 kwargs["escaped"] = self._parse_string() 2507 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2508 kwargs["collection_items"] = self._parse_string() 2509 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2510 kwargs["map_keys"] = self._parse_string() 2511 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2512 kwargs["lines"] = self._parse_string() 2513 if self._match_text_seq("NULL", "DEFINED", "AS"): 2514 kwargs["null"] = self._parse_string() 2515 2516 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2517 2518 def _parse_load(self) -> exp.LoadData | exp.Command: 2519 if self._match_text_seq("DATA"): 2520 local = self._match_text_seq("LOCAL") 2521 self._match_text_seq("INPATH") 2522 inpath = self._parse_string() 2523 overwrite = self._match(TokenType.OVERWRITE) 2524 self._match_pair(TokenType.INTO, TokenType.TABLE) 2525 2526 return self.expression( 2527 exp.LoadData, 2528 this=self._parse_table(schema=True), 2529 local=local, 2530 overwrite=overwrite, 2531 inpath=inpath, 2532 partition=self._parse_partition(), 2533 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2534 serde=self._match_text_seq("SERDE") and self._parse_string(), 2535 ) 2536 return self._parse_as_command(self._prev) 2537 2538 def _parse_delete(self) -> exp.Delete: 2539 # This handles MySQL's "Multiple-Table Syntax" 2540 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2541 tables = None 2542 comments = self._prev_comments 2543 if not self._match(TokenType.FROM, advance=False): 2544 tables = self._parse_csv(self._parse_table) or None 2545 2546 returning = self._parse_returning() 2547 2548 return self.expression( 2549 exp.Delete, 2550 comments=comments, 2551 tables=tables, 2552 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2553 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2554 where=self._parse_where(), 2555 returning=returning or self._parse_returning(), 2556 limit=self._parse_limit(), 2557 ) 2558 2559 def _parse_update(self) -> exp.Update: 2560 comments = self._prev_comments 2561 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2562 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2563 returning = self._parse_returning() 2564 return self.expression( 2565 exp.Update, 2566 comments=comments, 2567 **{ # type: ignore 2568 "this": this, 2569 "expressions": expressions, 2570 "from": self._parse_from(joins=True), 2571 "where": self._parse_where(), 2572 "returning": returning or self._parse_returning(), 2573 "order": self._parse_order(), 2574 "limit": self._parse_limit(), 2575 }, 2576 ) 2577 2578 def _parse_uncache(self) -> exp.Uncache: 2579 if not self._match(TokenType.TABLE): 2580 self.raise_error("Expecting TABLE after UNCACHE") 2581 2582 return self.expression( 2583 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2584 ) 2585 2586 def _parse_cache(self) -> exp.Cache: 2587 lazy = self._match_text_seq("LAZY") 2588 self._match(TokenType.TABLE) 2589 table = self._parse_table(schema=True) 2590 2591 options = [] 2592 if self._match_text_seq("OPTIONS"): 2593 self._match_l_paren() 2594 k = self._parse_string() 2595 self._match(TokenType.EQ) 2596 v = self._parse_string() 2597 options = [k, v] 2598 self._match_r_paren() 2599 2600 self._match(TokenType.ALIAS) 2601 return self.expression( 2602 exp.Cache, 2603 this=table, 2604 lazy=lazy, 2605 options=options, 2606 expression=self._parse_select(nested=True), 2607 ) 2608 2609 def _parse_partition(self) -> t.Optional[exp.Partition]: 2610 if not self._match(TokenType.PARTITION): 2611 return None 2612 2613 return self.expression( 2614 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2615 ) 2616 2617 def _parse_value(self) -> t.Optional[exp.Tuple]: 2618 if self._match(TokenType.L_PAREN): 2619 expressions = self._parse_csv(self._parse_expression) 2620 self._match_r_paren() 2621 return self.expression(exp.Tuple, expressions=expressions) 2622 2623 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2624 expression = self._parse_expression() 2625 if expression: 2626 return self.expression(exp.Tuple, expressions=[expression]) 2627 return None 2628 2629 def _parse_projections(self) -> t.List[exp.Expression]: 2630 return self._parse_expressions() 2631 2632 def _parse_select( 2633 self, 2634 nested: bool = False, 2635 table: bool = False, 2636 parse_subquery_alias: bool = True, 2637 parse_set_operation: bool = True, 2638 ) -> t.Optional[exp.Expression]: 2639 cte = self._parse_with() 2640 2641 if cte: 2642 this = self._parse_statement() 2643 2644 if not this: 2645 self.raise_error("Failed to parse any statement following CTE") 2646 return cte 2647 2648 if "with" in this.arg_types: 2649 this.set("with", cte) 2650 else: 2651 self.raise_error(f"{this.key} does not support CTE") 2652 this = cte 2653 2654 return this 2655 2656 # duckdb supports leading with FROM x 2657 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2658 2659 if self._match(TokenType.SELECT): 2660 comments = self._prev_comments 2661 2662 hint = self._parse_hint() 2663 all_ = self._match(TokenType.ALL) 2664 distinct = self._match_set(self.DISTINCT_TOKENS) 2665 2666 kind = ( 2667 self._match(TokenType.ALIAS) 2668 and self._match_texts(("STRUCT", "VALUE")) 2669 and self._prev.text.upper() 2670 ) 2671 2672 if distinct: 2673 distinct = self.expression( 2674 exp.Distinct, 2675 on=self._parse_value() if self._match(TokenType.ON) else None, 2676 ) 2677 2678 if all_ and distinct: 2679 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2680 2681 limit = self._parse_limit(top=True) 2682 projections = self._parse_projections() 2683 2684 this = self.expression( 2685 exp.Select, 2686 kind=kind, 2687 hint=hint, 2688 distinct=distinct, 2689 expressions=projections, 2690 limit=limit, 2691 ) 2692 this.comments = comments 2693 2694 into = self._parse_into() 2695 if into: 2696 this.set("into", into) 2697 2698 if not from_: 2699 from_ = self._parse_from() 2700 2701 if from_: 2702 this.set("from", from_) 2703 2704 this = self._parse_query_modifiers(this) 2705 elif (table or nested) and self._match(TokenType.L_PAREN): 2706 if self._match(TokenType.PIVOT): 2707 this = self._parse_simplified_pivot() 2708 elif self._match(TokenType.FROM): 2709 this = exp.select("*").from_( 2710 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2711 ) 2712 else: 2713 this = ( 2714 self._parse_table() 2715 if table 2716 else self._parse_select(nested=True, parse_set_operation=False) 2717 ) 2718 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2719 2720 self._match_r_paren() 2721 2722 # We return early here so that the UNION isn't attached to the subquery by the 2723 # following call to _parse_set_operations, but instead becomes the parent node 2724 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2725 elif self._match(TokenType.VALUES, advance=False): 2726 this = self._parse_derived_table_values() 2727 elif from_: 2728 this = exp.select("*").from_(from_.this, copy=False) 2729 else: 2730 this = None 2731 2732 if parse_set_operation: 2733 return self._parse_set_operations(this) 2734 return this 2735 2736 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2737 if not skip_with_token and not self._match(TokenType.WITH): 2738 return None 2739 2740 comments = self._prev_comments 2741 recursive = self._match(TokenType.RECURSIVE) 2742 2743 expressions = [] 2744 while True: 2745 expressions.append(self._parse_cte()) 2746 2747 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2748 break 2749 else: 2750 self._match(TokenType.WITH) 2751 2752 return self.expression( 2753 exp.With, comments=comments, expressions=expressions, recursive=recursive 2754 ) 2755 2756 def _parse_cte(self) -> exp.CTE: 2757 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2758 if not alias or not alias.this: 2759 self.raise_error("Expected CTE to have alias") 2760 2761 self._match(TokenType.ALIAS) 2762 2763 if self._match_text_seq("NOT", "MATERIALIZED"): 2764 materialized = False 2765 elif self._match_text_seq("MATERIALIZED"): 2766 materialized = True 2767 else: 2768 materialized = None 2769 2770 return self.expression( 2771 exp.CTE, 2772 this=self._parse_wrapped(self._parse_statement), 2773 alias=alias, 2774 materialized=materialized, 2775 ) 2776 2777 def _parse_table_alias( 2778 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2779 ) -> t.Optional[exp.TableAlias]: 2780 any_token = self._match(TokenType.ALIAS) 2781 alias = ( 2782 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2783 or self._parse_string_as_identifier() 2784 ) 2785 2786 index = self._index 2787 if self._match(TokenType.L_PAREN): 2788 columns = self._parse_csv(self._parse_function_parameter) 2789 self._match_r_paren() if columns else self._retreat(index) 2790 else: 2791 columns = None 2792 2793 if not alias and not columns: 2794 return None 2795 2796 return self.expression(exp.TableAlias, this=alias, columns=columns) 2797 2798 def _parse_subquery( 2799 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2800 ) -> t.Optional[exp.Subquery]: 2801 if not this: 2802 return None 2803 2804 return self.expression( 2805 exp.Subquery, 2806 this=this, 2807 pivots=self._parse_pivots(), 2808 alias=self._parse_table_alias() if parse_alias else None, 2809 ) 2810 2811 def _implicit_unnests_to_explicit(self, this: E) -> E: 2812 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2813 2814 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2815 for i, join in enumerate(this.args.get("joins") or []): 2816 table = join.this 2817 normalized_table = table.copy() 2818 normalized_table.meta["maybe_column"] = True 2819 normalized_table = _norm(normalized_table, dialect=self.dialect) 2820 2821 if isinstance(table, exp.Table) and not join.args.get("on"): 2822 if normalized_table.parts[0].name in refs: 2823 table_as_column = table.to_column() 2824 unnest = exp.Unnest(expressions=[table_as_column]) 2825 2826 # Table.to_column creates a parent Alias node that we want to convert to 2827 # a TableAlias and attach to the Unnest, so it matches the parser's output 2828 if isinstance(table.args.get("alias"), exp.TableAlias): 2829 table_as_column.replace(table_as_column.this) 2830 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2831 2832 table.replace(unnest) 2833 2834 refs.add(normalized_table.alias_or_name) 2835 2836 return this 2837 2838 def _parse_query_modifiers( 2839 self, this: t.Optional[exp.Expression] 2840 ) -> t.Optional[exp.Expression]: 2841 if isinstance(this, (exp.Query, exp.Table)): 2842 for join in self._parse_joins(): 2843 this.append("joins", join) 2844 for lateral in iter(self._parse_lateral, None): 2845 this.append("laterals", lateral) 2846 2847 while True: 2848 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2849 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2850 key, expression = parser(self) 2851 2852 if expression: 2853 this.set(key, expression) 2854 if key == "limit": 2855 offset = expression.args.pop("offset", None) 2856 2857 if offset: 2858 offset = exp.Offset(expression=offset) 2859 this.set("offset", offset) 2860 2861 limit_by_expressions = expression.expressions 2862 expression.set("expressions", None) 2863 offset.set("expressions", limit_by_expressions) 2864 continue 2865 break 2866 2867 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2868 this = self._implicit_unnests_to_explicit(this) 2869 2870 return this 2871 2872 def _parse_hint(self) -> t.Optional[exp.Hint]: 2873 if self._match(TokenType.HINT): 2874 hints = [] 2875 for hint in iter( 2876 lambda: self._parse_csv( 2877 lambda: self._parse_function() or self._parse_var(upper=True) 2878 ), 2879 [], 2880 ): 2881 hints.extend(hint) 2882 2883 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2884 self.raise_error("Expected */ after HINT") 2885 2886 return self.expression(exp.Hint, expressions=hints) 2887 2888 return None 2889 2890 def _parse_into(self) -> t.Optional[exp.Into]: 2891 if not self._match(TokenType.INTO): 2892 return None 2893 2894 temp = self._match(TokenType.TEMPORARY) 2895 unlogged = self._match_text_seq("UNLOGGED") 2896 self._match(TokenType.TABLE) 2897 2898 return self.expression( 2899 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2900 ) 2901 2902 def _parse_from( 2903 self, joins: bool = False, skip_from_token: bool = False 2904 ) -> t.Optional[exp.From]: 2905 if not skip_from_token and not self._match(TokenType.FROM): 2906 return None 2907 2908 return self.expression( 2909 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2910 ) 2911 2912 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2913 return self.expression( 2914 exp.MatchRecognizeMeasure, 2915 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2916 this=self._parse_expression(), 2917 ) 2918 2919 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2920 if not self._match(TokenType.MATCH_RECOGNIZE): 2921 return None 2922 2923 self._match_l_paren() 2924 2925 partition = self._parse_partition_by() 2926 order = self._parse_order() 2927 2928 measures = ( 2929 self._parse_csv(self._parse_match_recognize_measure) 2930 if self._match_text_seq("MEASURES") 2931 else None 2932 ) 2933 2934 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2935 rows = exp.var("ONE ROW PER MATCH") 2936 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2937 text = "ALL ROWS PER MATCH" 2938 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2939 text += " SHOW EMPTY MATCHES" 2940 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2941 text += " OMIT EMPTY MATCHES" 2942 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2943 text += " WITH UNMATCHED ROWS" 2944 rows = exp.var(text) 2945 else: 2946 rows = None 2947 2948 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2949 text = "AFTER MATCH SKIP" 2950 if self._match_text_seq("PAST", "LAST", "ROW"): 2951 text += " PAST LAST ROW" 2952 elif self._match_text_seq("TO", "NEXT", "ROW"): 2953 text += " TO NEXT ROW" 2954 elif self._match_text_seq("TO", "FIRST"): 2955 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2956 elif self._match_text_seq("TO", "LAST"): 2957 text += f" TO LAST {self._advance_any().text}" # type: ignore 2958 after = exp.var(text) 2959 else: 2960 after = None 2961 2962 if self._match_text_seq("PATTERN"): 2963 self._match_l_paren() 2964 2965 if not self._curr: 2966 self.raise_error("Expecting )", self._curr) 2967 2968 paren = 1 2969 start = self._curr 2970 2971 while self._curr and paren > 0: 2972 if self._curr.token_type == TokenType.L_PAREN: 2973 paren += 1 2974 if self._curr.token_type == TokenType.R_PAREN: 2975 paren -= 1 2976 2977 end = self._prev 2978 self._advance() 2979 2980 if paren > 0: 2981 self.raise_error("Expecting )", self._curr) 2982 2983 pattern = exp.var(self._find_sql(start, end)) 2984 else: 2985 pattern = None 2986 2987 define = ( 2988 self._parse_csv(self._parse_name_as_expression) 2989 if self._match_text_seq("DEFINE") 2990 else None 2991 ) 2992 2993 self._match_r_paren() 2994 2995 return self.expression( 2996 exp.MatchRecognize, 2997 partition_by=partition, 2998 order=order, 2999 measures=measures, 3000 rows=rows, 3001 after=after, 3002 pattern=pattern, 3003 define=define, 3004 alias=self._parse_table_alias(), 3005 ) 3006 3007 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3008 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3009 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3010 cross_apply = False 3011 3012 if cross_apply is not None: 3013 this = self._parse_select(table=True) 3014 view = None 3015 outer = None 3016 elif self._match(TokenType.LATERAL): 3017 this = self._parse_select(table=True) 3018 view = self._match(TokenType.VIEW) 3019 outer = self._match(TokenType.OUTER) 3020 else: 3021 return None 3022 3023 if not this: 3024 this = ( 3025 self._parse_unnest() 3026 or self._parse_function() 3027 or self._parse_id_var(any_token=False) 3028 ) 3029 3030 while self._match(TokenType.DOT): 3031 this = exp.Dot( 3032 this=this, 3033 expression=self._parse_function() or self._parse_id_var(any_token=False), 3034 ) 3035 3036 if view: 3037 table = self._parse_id_var(any_token=False) 3038 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3039 table_alias: t.Optional[exp.TableAlias] = self.expression( 3040 exp.TableAlias, this=table, columns=columns 3041 ) 3042 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3043 # We move the alias from the lateral's child node to the lateral itself 3044 table_alias = this.args["alias"].pop() 3045 else: 3046 table_alias = self._parse_table_alias() 3047 3048 return self.expression( 3049 exp.Lateral, 3050 this=this, 3051 view=view, 3052 outer=outer, 3053 alias=table_alias, 3054 cross_apply=cross_apply, 3055 ) 3056 3057 def _parse_join_parts( 3058 self, 3059 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3060 return ( 3061 self._match_set(self.JOIN_METHODS) and self._prev, 3062 self._match_set(self.JOIN_SIDES) and self._prev, 3063 self._match_set(self.JOIN_KINDS) and self._prev, 3064 ) 3065 3066 def _parse_join( 3067 self, skip_join_token: bool = False, parse_bracket: bool = False 3068 ) -> t.Optional[exp.Join]: 3069 if self._match(TokenType.COMMA): 3070 return self.expression(exp.Join, this=self._parse_table()) 3071 3072 index = self._index 3073 method, side, kind = self._parse_join_parts() 3074 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3075 join = self._match(TokenType.JOIN) 3076 3077 if not skip_join_token and not join: 3078 self._retreat(index) 3079 kind = None 3080 method = None 3081 side = None 3082 3083 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3084 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3085 3086 if not skip_join_token and not join and not outer_apply and not cross_apply: 3087 return None 3088 3089 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3090 3091 if method: 3092 kwargs["method"] = method.text 3093 if side: 3094 kwargs["side"] = side.text 3095 if kind: 3096 kwargs["kind"] = kind.text 3097 if hint: 3098 kwargs["hint"] = hint 3099 3100 if self._match(TokenType.MATCH_CONDITION): 3101 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3102 3103 if self._match(TokenType.ON): 3104 kwargs["on"] = self._parse_conjunction() 3105 elif self._match(TokenType.USING): 3106 kwargs["using"] = self._parse_wrapped_id_vars() 3107 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3108 kind and kind.token_type == TokenType.CROSS 3109 ): 3110 index = self._index 3111 joins: t.Optional[list] = list(self._parse_joins()) 3112 3113 if joins and self._match(TokenType.ON): 3114 kwargs["on"] = self._parse_conjunction() 3115 elif joins and self._match(TokenType.USING): 3116 kwargs["using"] = self._parse_wrapped_id_vars() 3117 else: 3118 joins = None 3119 self._retreat(index) 3120 3121 kwargs["this"].set("joins", joins if joins else None) 3122 3123 comments = [c for token in (method, side, kind) if token for c in token.comments] 3124 return self.expression(exp.Join, comments=comments, **kwargs) 3125 3126 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3127 this = self._parse_conjunction() 3128 3129 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3130 return this 3131 3132 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3133 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3134 3135 return this 3136 3137 def _parse_index_params(self) -> exp.IndexParameters: 3138 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3139 3140 if self._match(TokenType.L_PAREN, advance=False): 3141 columns = self._parse_wrapped_csv(self._parse_with_operator) 3142 else: 3143 columns = None 3144 3145 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3146 partition_by = self._parse_partition_by() 3147 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3148 tablespace = ( 3149 self._parse_var(any_token=True) 3150 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3151 else None 3152 ) 3153 where = self._parse_where() 3154 3155 return self.expression( 3156 exp.IndexParameters, 3157 using=using, 3158 columns=columns, 3159 include=include, 3160 partition_by=partition_by, 3161 where=where, 3162 with_storage=with_storage, 3163 tablespace=tablespace, 3164 ) 3165 3166 def _parse_index( 3167 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3168 ) -> t.Optional[exp.Index]: 3169 if index or anonymous: 3170 unique = None 3171 primary = None 3172 amp = None 3173 3174 self._match(TokenType.ON) 3175 self._match(TokenType.TABLE) # hive 3176 table = self._parse_table_parts(schema=True) 3177 else: 3178 unique = self._match(TokenType.UNIQUE) 3179 primary = self._match_text_seq("PRIMARY") 3180 amp = self._match_text_seq("AMP") 3181 3182 if not self._match(TokenType.INDEX): 3183 return None 3184 3185 index = self._parse_id_var() 3186 table = None 3187 3188 params = self._parse_index_params() 3189 3190 return self.expression( 3191 exp.Index, 3192 this=index, 3193 table=table, 3194 unique=unique, 3195 primary=primary, 3196 amp=amp, 3197 params=params, 3198 ) 3199 3200 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3201 hints: t.List[exp.Expression] = [] 3202 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3203 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3204 hints.append( 3205 self.expression( 3206 exp.WithTableHint, 3207 expressions=self._parse_csv( 3208 lambda: self._parse_function() or self._parse_var(any_token=True) 3209 ), 3210 ) 3211 ) 3212 self._match_r_paren() 3213 else: 3214 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3215 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3216 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3217 3218 self._match_texts(("INDEX", "KEY")) 3219 if self._match(TokenType.FOR): 3220 hint.set("target", self._advance_any() and self._prev.text.upper()) 3221 3222 hint.set("expressions", self._parse_wrapped_id_vars()) 3223 hints.append(hint) 3224 3225 return hints or None 3226 3227 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3228 return ( 3229 (not schema and self._parse_function(optional_parens=False)) 3230 or self._parse_id_var(any_token=False) 3231 or self._parse_string_as_identifier() 3232 or self._parse_placeholder() 3233 ) 3234 3235 def _parse_table_parts( 3236 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3237 ) -> exp.Table: 3238 catalog = None 3239 db = None 3240 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3241 3242 while self._match(TokenType.DOT): 3243 if catalog: 3244 # This allows nesting the table in arbitrarily many dot expressions if needed 3245 table = self.expression( 3246 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3247 ) 3248 else: 3249 catalog = db 3250 db = table 3251 # "" used for tsql FROM a..b case 3252 table = self._parse_table_part(schema=schema) or "" 3253 3254 if ( 3255 wildcard 3256 and self._is_connected() 3257 and (isinstance(table, exp.Identifier) or not table) 3258 and self._match(TokenType.STAR) 3259 ): 3260 if isinstance(table, exp.Identifier): 3261 table.args["this"] += "*" 3262 else: 3263 table = exp.Identifier(this="*") 3264 3265 # We bubble up comments from the Identifier to the Table 3266 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3267 3268 if is_db_reference: 3269 catalog = db 3270 db = table 3271 table = None 3272 3273 if not table and not is_db_reference: 3274 self.raise_error(f"Expected table name but got {self._curr}") 3275 if not db and is_db_reference: 3276 self.raise_error(f"Expected database name but got {self._curr}") 3277 3278 return self.expression( 3279 exp.Table, 3280 comments=comments, 3281 this=table, 3282 db=db, 3283 catalog=catalog, 3284 pivots=self._parse_pivots(), 3285 ) 3286 3287 def _parse_table( 3288 self, 3289 schema: bool = False, 3290 joins: bool = False, 3291 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3292 parse_bracket: bool = False, 3293 is_db_reference: bool = False, 3294 parse_partition: bool = False, 3295 ) -> t.Optional[exp.Expression]: 3296 lateral = self._parse_lateral() 3297 if lateral: 3298 return lateral 3299 3300 unnest = self._parse_unnest() 3301 if unnest: 3302 return unnest 3303 3304 values = self._parse_derived_table_values() 3305 if values: 3306 return values 3307 3308 subquery = self._parse_select(table=True) 3309 if subquery: 3310 if not subquery.args.get("pivots"): 3311 subquery.set("pivots", self._parse_pivots()) 3312 return subquery 3313 3314 bracket = parse_bracket and self._parse_bracket(None) 3315 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3316 3317 only = self._match(TokenType.ONLY) 3318 3319 this = t.cast( 3320 exp.Expression, 3321 bracket 3322 or self._parse_bracket( 3323 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3324 ), 3325 ) 3326 3327 if only: 3328 this.set("only", only) 3329 3330 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3331 self._match_text_seq("*") 3332 3333 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3334 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3335 this.set("partition", self._parse_partition()) 3336 3337 if schema: 3338 return self._parse_schema(this=this) 3339 3340 version = self._parse_version() 3341 3342 if version: 3343 this.set("version", version) 3344 3345 if self.dialect.ALIAS_POST_TABLESAMPLE: 3346 table_sample = self._parse_table_sample() 3347 3348 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3349 if alias: 3350 this.set("alias", alias) 3351 3352 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3353 return self.expression( 3354 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3355 ) 3356 3357 this.set("hints", self._parse_table_hints()) 3358 3359 if not this.args.get("pivots"): 3360 this.set("pivots", self._parse_pivots()) 3361 3362 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3363 table_sample = self._parse_table_sample() 3364 3365 if table_sample: 3366 table_sample.set("this", this) 3367 this = table_sample 3368 3369 if joins: 3370 for join in self._parse_joins(): 3371 this.append("joins", join) 3372 3373 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3374 this.set("ordinality", True) 3375 this.set("alias", self._parse_table_alias()) 3376 3377 return this 3378 3379 def _parse_version(self) -> t.Optional[exp.Version]: 3380 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3381 this = "TIMESTAMP" 3382 elif self._match(TokenType.VERSION_SNAPSHOT): 3383 this = "VERSION" 3384 else: 3385 return None 3386 3387 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3388 kind = self._prev.text.upper() 3389 start = self._parse_bitwise() 3390 self._match_texts(("TO", "AND")) 3391 end = self._parse_bitwise() 3392 expression: t.Optional[exp.Expression] = self.expression( 3393 exp.Tuple, expressions=[start, end] 3394 ) 3395 elif self._match_text_seq("CONTAINED", "IN"): 3396 kind = "CONTAINED IN" 3397 expression = self.expression( 3398 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3399 ) 3400 elif self._match(TokenType.ALL): 3401 kind = "ALL" 3402 expression = None 3403 else: 3404 self._match_text_seq("AS", "OF") 3405 kind = "AS OF" 3406 expression = self._parse_type() 3407 3408 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3409 3410 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3411 if not self._match(TokenType.UNNEST): 3412 return None 3413 3414 expressions = self._parse_wrapped_csv(self._parse_equality) 3415 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3416 3417 alias = self._parse_table_alias() if with_alias else None 3418 3419 if alias: 3420 if self.dialect.UNNEST_COLUMN_ONLY: 3421 if alias.args.get("columns"): 3422 self.raise_error("Unexpected extra column alias in unnest.") 3423 3424 alias.set("columns", [alias.this]) 3425 alias.set("this", None) 3426 3427 columns = alias.args.get("columns") or [] 3428 if offset and len(expressions) < len(columns): 3429 offset = columns.pop() 3430 3431 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3432 self._match(TokenType.ALIAS) 3433 offset = self._parse_id_var( 3434 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3435 ) or exp.to_identifier("offset") 3436 3437 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3438 3439 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3440 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3441 if not is_derived and not self._match_text_seq("VALUES"): 3442 return None 3443 3444 expressions = self._parse_csv(self._parse_value) 3445 alias = self._parse_table_alias() 3446 3447 if is_derived: 3448 self._match_r_paren() 3449 3450 return self.expression( 3451 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3452 ) 3453 3454 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3455 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3456 as_modifier and self._match_text_seq("USING", "SAMPLE") 3457 ): 3458 return None 3459 3460 bucket_numerator = None 3461 bucket_denominator = None 3462 bucket_field = None 3463 percent = None 3464 size = None 3465 seed = None 3466 3467 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3468 matched_l_paren = self._match(TokenType.L_PAREN) 3469 3470 if self.TABLESAMPLE_CSV: 3471 num = None 3472 expressions = self._parse_csv(self._parse_primary) 3473 else: 3474 expressions = None 3475 num = ( 3476 self._parse_factor() 3477 if self._match(TokenType.NUMBER, advance=False) 3478 else self._parse_primary() or self._parse_placeholder() 3479 ) 3480 3481 if self._match_text_seq("BUCKET"): 3482 bucket_numerator = self._parse_number() 3483 self._match_text_seq("OUT", "OF") 3484 bucket_denominator = bucket_denominator = self._parse_number() 3485 self._match(TokenType.ON) 3486 bucket_field = self._parse_field() 3487 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3488 percent = num 3489 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3490 size = num 3491 else: 3492 percent = num 3493 3494 if matched_l_paren: 3495 self._match_r_paren() 3496 3497 if self._match(TokenType.L_PAREN): 3498 method = self._parse_var(upper=True) 3499 seed = self._match(TokenType.COMMA) and self._parse_number() 3500 self._match_r_paren() 3501 elif self._match_texts(("SEED", "REPEATABLE")): 3502 seed = self._parse_wrapped(self._parse_number) 3503 3504 if not method and self.DEFAULT_SAMPLING_METHOD: 3505 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3506 3507 return self.expression( 3508 exp.TableSample, 3509 expressions=expressions, 3510 method=method, 3511 bucket_numerator=bucket_numerator, 3512 bucket_denominator=bucket_denominator, 3513 bucket_field=bucket_field, 3514 percent=percent, 3515 size=size, 3516 seed=seed, 3517 ) 3518 3519 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3520 return list(iter(self._parse_pivot, None)) or None 3521 3522 def _parse_joins(self) -> t.Iterator[exp.Join]: 3523 return iter(self._parse_join, None) 3524 3525 # https://duckdb.org/docs/sql/statements/pivot 3526 def _parse_simplified_pivot(self) -> exp.Pivot: 3527 def _parse_on() -> t.Optional[exp.Expression]: 3528 this = self._parse_bitwise() 3529 return self._parse_in(this) if self._match(TokenType.IN) else this 3530 3531 this = self._parse_table() 3532 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3533 using = self._match(TokenType.USING) and self._parse_csv( 3534 lambda: self._parse_alias(self._parse_function()) 3535 ) 3536 group = self._parse_group() 3537 return self.expression( 3538 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3539 ) 3540 3541 def _parse_pivot_in(self) -> exp.In: 3542 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3543 this = self._parse_conjunction() 3544 3545 self._match(TokenType.ALIAS) 3546 alias = self._parse_field() 3547 if alias: 3548 return self.expression(exp.PivotAlias, this=this, alias=alias) 3549 3550 return this 3551 3552 value = self._parse_column() 3553 3554 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3555 self.raise_error("Expecting IN (") 3556 3557 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3558 3559 self._match_r_paren() 3560 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3561 3562 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3563 index = self._index 3564 include_nulls = None 3565 3566 if self._match(TokenType.PIVOT): 3567 unpivot = False 3568 elif self._match(TokenType.UNPIVOT): 3569 unpivot = True 3570 3571 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3572 if self._match_text_seq("INCLUDE", "NULLS"): 3573 include_nulls = True 3574 elif self._match_text_seq("EXCLUDE", "NULLS"): 3575 include_nulls = False 3576 else: 3577 return None 3578 3579 expressions = [] 3580 3581 if not self._match(TokenType.L_PAREN): 3582 self._retreat(index) 3583 return None 3584 3585 if unpivot: 3586 expressions = self._parse_csv(self._parse_column) 3587 else: 3588 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3589 3590 if not expressions: 3591 self.raise_error("Failed to parse PIVOT's aggregation list") 3592 3593 if not self._match(TokenType.FOR): 3594 self.raise_error("Expecting FOR") 3595 3596 field = self._parse_pivot_in() 3597 3598 self._match_r_paren() 3599 3600 pivot = self.expression( 3601 exp.Pivot, 3602 expressions=expressions, 3603 field=field, 3604 unpivot=unpivot, 3605 include_nulls=include_nulls, 3606 ) 3607 3608 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3609 pivot.set("alias", self._parse_table_alias()) 3610 3611 if not unpivot: 3612 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3613 3614 columns: t.List[exp.Expression] = [] 3615 for fld in pivot.args["field"].expressions: 3616 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3617 for name in names: 3618 if self.PREFIXED_PIVOT_COLUMNS: 3619 name = f"{name}_{field_name}" if name else field_name 3620 else: 3621 name = f"{field_name}_{name}" if name else field_name 3622 3623 columns.append(exp.to_identifier(name)) 3624 3625 pivot.set("columns", columns) 3626 3627 return pivot 3628 3629 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3630 return [agg.alias for agg in aggregations] 3631 3632 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3633 if not skip_where_token and not self._match(TokenType.PREWHERE): 3634 return None 3635 3636 return self.expression( 3637 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3638 ) 3639 3640 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3641 if not skip_where_token and not self._match(TokenType.WHERE): 3642 return None 3643 3644 return self.expression( 3645 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3646 ) 3647 3648 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3649 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3650 return None 3651 3652 elements: t.Dict[str, t.Any] = defaultdict(list) 3653 3654 if self._match(TokenType.ALL): 3655 elements["all"] = True 3656 elif self._match(TokenType.DISTINCT): 3657 elements["all"] = False 3658 3659 while True: 3660 expressions = self._parse_csv( 3661 lambda: None 3662 if self._match(TokenType.ROLLUP, advance=False) 3663 else self._parse_conjunction() 3664 ) 3665 if expressions: 3666 elements["expressions"].extend(expressions) 3667 3668 grouping_sets = self._parse_grouping_sets() 3669 if grouping_sets: 3670 elements["grouping_sets"].extend(grouping_sets) 3671 3672 rollup = None 3673 cube = None 3674 totals = None 3675 3676 index = self._index 3677 with_ = self._match(TokenType.WITH) 3678 if self._match(TokenType.ROLLUP): 3679 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3680 elements["rollup"].extend(ensure_list(rollup)) 3681 3682 if self._match(TokenType.CUBE): 3683 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3684 elements["cube"].extend(ensure_list(cube)) 3685 3686 if self._match_text_seq("TOTALS"): 3687 totals = True 3688 elements["totals"] = True # type: ignore 3689 3690 if not (grouping_sets or rollup or cube or totals): 3691 if with_: 3692 self._retreat(index) 3693 break 3694 3695 return self.expression(exp.Group, **elements) # type: ignore 3696 3697 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3698 if not self._match(TokenType.GROUPING_SETS): 3699 return None 3700 3701 return self._parse_wrapped_csv(self._parse_grouping_set) 3702 3703 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3704 if self._match(TokenType.L_PAREN): 3705 grouping_set = self._parse_csv(self._parse_column) 3706 self._match_r_paren() 3707 return self.expression(exp.Tuple, expressions=grouping_set) 3708 3709 return self._parse_column() 3710 3711 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3712 if not skip_having_token and not self._match(TokenType.HAVING): 3713 return None 3714 return self.expression(exp.Having, this=self._parse_conjunction()) 3715 3716 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3717 if not self._match(TokenType.QUALIFY): 3718 return None 3719 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3720 3721 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3722 if skip_start_token: 3723 start = None 3724 elif self._match(TokenType.START_WITH): 3725 start = self._parse_conjunction() 3726 else: 3727 return None 3728 3729 self._match(TokenType.CONNECT_BY) 3730 nocycle = self._match_text_seq("NOCYCLE") 3731 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3732 exp.Prior, this=self._parse_bitwise() 3733 ) 3734 connect = self._parse_conjunction() 3735 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3736 3737 if not start and self._match(TokenType.START_WITH): 3738 start = self._parse_conjunction() 3739 3740 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3741 3742 def _parse_name_as_expression(self) -> exp.Alias: 3743 return self.expression( 3744 exp.Alias, 3745 alias=self._parse_id_var(any_token=True), 3746 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3747 ) 3748 3749 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3750 if self._match_text_seq("INTERPOLATE"): 3751 return self._parse_wrapped_csv(self._parse_name_as_expression) 3752 return None 3753 3754 def _parse_order( 3755 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3756 ) -> t.Optional[exp.Expression]: 3757 siblings = None 3758 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3759 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3760 return this 3761 3762 siblings = True 3763 3764 return self.expression( 3765 exp.Order, 3766 this=this, 3767 expressions=self._parse_csv(self._parse_ordered), 3768 interpolate=self._parse_interpolate(), 3769 siblings=siblings, 3770 ) 3771 3772 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3773 if not self._match(token): 3774 return None 3775 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3776 3777 def _parse_ordered( 3778 self, parse_method: t.Optional[t.Callable] = None 3779 ) -> t.Optional[exp.Ordered]: 3780 this = parse_method() if parse_method else self._parse_conjunction() 3781 if not this: 3782 return None 3783 3784 asc = self._match(TokenType.ASC) 3785 desc = self._match(TokenType.DESC) or (asc and False) 3786 3787 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3788 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3789 3790 nulls_first = is_nulls_first or False 3791 explicitly_null_ordered = is_nulls_first or is_nulls_last 3792 3793 if ( 3794 not explicitly_null_ordered 3795 and ( 3796 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3797 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3798 ) 3799 and self.dialect.NULL_ORDERING != "nulls_are_last" 3800 ): 3801 nulls_first = True 3802 3803 if self._match_text_seq("WITH", "FILL"): 3804 with_fill = self.expression( 3805 exp.WithFill, 3806 **{ # type: ignore 3807 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3808 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3809 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3810 }, 3811 ) 3812 else: 3813 with_fill = None 3814 3815 return self.expression( 3816 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3817 ) 3818 3819 def _parse_limit( 3820 self, 3821 this: t.Optional[exp.Expression] = None, 3822 top: bool = False, 3823 skip_limit_token: bool = False, 3824 ) -> t.Optional[exp.Expression]: 3825 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3826 comments = self._prev_comments 3827 if top: 3828 limit_paren = self._match(TokenType.L_PAREN) 3829 expression = self._parse_term() if limit_paren else self._parse_number() 3830 3831 if limit_paren: 3832 self._match_r_paren() 3833 else: 3834 expression = self._parse_term() 3835 3836 if self._match(TokenType.COMMA): 3837 offset = expression 3838 expression = self._parse_term() 3839 else: 3840 offset = None 3841 3842 limit_exp = self.expression( 3843 exp.Limit, 3844 this=this, 3845 expression=expression, 3846 offset=offset, 3847 comments=comments, 3848 expressions=self._parse_limit_by(), 3849 ) 3850 3851 return limit_exp 3852 3853 if self._match(TokenType.FETCH): 3854 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3855 direction = self._prev.text.upper() if direction else "FIRST" 3856 3857 count = self._parse_field(tokens=self.FETCH_TOKENS) 3858 percent = self._match(TokenType.PERCENT) 3859 3860 self._match_set((TokenType.ROW, TokenType.ROWS)) 3861 3862 only = self._match_text_seq("ONLY") 3863 with_ties = self._match_text_seq("WITH", "TIES") 3864 3865 if only and with_ties: 3866 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3867 3868 return self.expression( 3869 exp.Fetch, 3870 direction=direction, 3871 count=count, 3872 percent=percent, 3873 with_ties=with_ties, 3874 ) 3875 3876 return this 3877 3878 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3879 if not self._match(TokenType.OFFSET): 3880 return this 3881 3882 count = self._parse_term() 3883 self._match_set((TokenType.ROW, TokenType.ROWS)) 3884 3885 return self.expression( 3886 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3887 ) 3888 3889 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3890 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3891 3892 def _parse_locks(self) -> t.List[exp.Lock]: 3893 locks = [] 3894 while True: 3895 if self._match_text_seq("FOR", "UPDATE"): 3896 update = True 3897 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3898 "LOCK", "IN", "SHARE", "MODE" 3899 ): 3900 update = False 3901 else: 3902 break 3903 3904 expressions = None 3905 if self._match_text_seq("OF"): 3906 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3907 3908 wait: t.Optional[bool | exp.Expression] = None 3909 if self._match_text_seq("NOWAIT"): 3910 wait = True 3911 elif self._match_text_seq("WAIT"): 3912 wait = self._parse_primary() 3913 elif self._match_text_seq("SKIP", "LOCKED"): 3914 wait = False 3915 3916 locks.append( 3917 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3918 ) 3919 3920 return locks 3921 3922 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3923 while this and self._match_set(self.SET_OPERATIONS): 3924 token_type = self._prev.token_type 3925 3926 if token_type == TokenType.UNION: 3927 operation = exp.Union 3928 elif token_type == TokenType.EXCEPT: 3929 operation = exp.Except 3930 else: 3931 operation = exp.Intersect 3932 3933 comments = self._prev.comments 3934 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3935 by_name = self._match_text_seq("BY", "NAME") 3936 expression = self._parse_select(nested=True, parse_set_operation=False) 3937 3938 this = self.expression( 3939 operation, 3940 comments=comments, 3941 this=this, 3942 distinct=distinct, 3943 by_name=by_name, 3944 expression=expression, 3945 ) 3946 3947 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3948 expression = this.expression 3949 3950 if expression: 3951 for arg in self.UNION_MODIFIERS: 3952 expr = expression.args.get(arg) 3953 if expr: 3954 this.set(arg, expr.pop()) 3955 3956 return this 3957 3958 def _parse_expression(self) -> t.Optional[exp.Expression]: 3959 return self._parse_alias(self._parse_conjunction()) 3960 3961 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3962 this = self._parse_equality() 3963 3964 if self._match(TokenType.COLON_EQ): 3965 this = self.expression( 3966 exp.PropertyEQ, 3967 this=this, 3968 comments=self._prev_comments, 3969 expression=self._parse_conjunction(), 3970 ) 3971 3972 while self._match_set(self.CONJUNCTION): 3973 this = self.expression( 3974 self.CONJUNCTION[self._prev.token_type], 3975 this=this, 3976 comments=self._prev_comments, 3977 expression=self._parse_equality(), 3978 ) 3979 return this 3980 3981 def _parse_equality(self) -> t.Optional[exp.Expression]: 3982 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3983 3984 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3985 return self._parse_tokens(self._parse_range, self.COMPARISON) 3986 3987 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3988 this = this or self._parse_bitwise() 3989 negate = self._match(TokenType.NOT) 3990 3991 if self._match_set(self.RANGE_PARSERS): 3992 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3993 if not expression: 3994 return this 3995 3996 this = expression 3997 elif self._match(TokenType.ISNULL): 3998 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3999 4000 # Postgres supports ISNULL and NOTNULL for conditions. 4001 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4002 if self._match(TokenType.NOTNULL): 4003 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4004 this = self.expression(exp.Not, this=this) 4005 4006 if negate: 4007 this = self.expression(exp.Not, this=this) 4008 4009 if self._match(TokenType.IS): 4010 this = self._parse_is(this) 4011 4012 return this 4013 4014 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4015 index = self._index - 1 4016 negate = self._match(TokenType.NOT) 4017 4018 if self._match_text_seq("DISTINCT", "FROM"): 4019 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4020 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4021 4022 expression = self._parse_null() or self._parse_boolean() 4023 if not expression: 4024 self._retreat(index) 4025 return None 4026 4027 this = self.expression(exp.Is, this=this, expression=expression) 4028 return self.expression(exp.Not, this=this) if negate else this 4029 4030 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4031 unnest = self._parse_unnest(with_alias=False) 4032 if unnest: 4033 this = self.expression(exp.In, this=this, unnest=unnest) 4034 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4035 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4036 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4037 4038 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4039 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4040 else: 4041 this = self.expression(exp.In, this=this, expressions=expressions) 4042 4043 if matched_l_paren: 4044 self._match_r_paren(this) 4045 elif not self._match(TokenType.R_BRACKET, expression=this): 4046 self.raise_error("Expecting ]") 4047 else: 4048 this = self.expression(exp.In, this=this, field=self._parse_field()) 4049 4050 return this 4051 4052 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4053 low = self._parse_bitwise() 4054 self._match(TokenType.AND) 4055 high = self._parse_bitwise() 4056 return self.expression(exp.Between, this=this, low=low, high=high) 4057 4058 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4059 if not self._match(TokenType.ESCAPE): 4060 return this 4061 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4062 4063 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4064 index = self._index 4065 4066 if not self._match(TokenType.INTERVAL) and match_interval: 4067 return None 4068 4069 if self._match(TokenType.STRING, advance=False): 4070 this = self._parse_primary() 4071 else: 4072 this = self._parse_term() 4073 4074 if not this or ( 4075 isinstance(this, exp.Column) 4076 and not this.table 4077 and not this.this.quoted 4078 and this.name.upper() == "IS" 4079 ): 4080 self._retreat(index) 4081 return None 4082 4083 unit = self._parse_function() or ( 4084 not self._match(TokenType.ALIAS, advance=False) 4085 and self._parse_var(any_token=True, upper=True) 4086 ) 4087 4088 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4089 # each INTERVAL expression into this canonical form so it's easy to transpile 4090 if this and this.is_number: 4091 this = exp.Literal.string(this.name) 4092 elif this and this.is_string: 4093 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4094 if len(parts) == 1: 4095 if unit: 4096 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4097 self._retreat(self._index - 1) 4098 4099 this = exp.Literal.string(parts[0][0]) 4100 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4101 4102 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4103 unit = self.expression( 4104 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4105 ) 4106 4107 interval = self.expression(exp.Interval, this=this, unit=unit) 4108 4109 index = self._index 4110 self._match(TokenType.PLUS) 4111 4112 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4113 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4114 return self.expression( 4115 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4116 ) 4117 4118 self._retreat(index) 4119 return interval 4120 4121 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4122 this = self._parse_term() 4123 4124 while True: 4125 if self._match_set(self.BITWISE): 4126 this = self.expression( 4127 self.BITWISE[self._prev.token_type], 4128 this=this, 4129 expression=self._parse_term(), 4130 ) 4131 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4132 this = self.expression( 4133 exp.DPipe, 4134 this=this, 4135 expression=self._parse_term(), 4136 safe=not self.dialect.STRICT_STRING_CONCAT, 4137 ) 4138 elif self._match(TokenType.DQMARK): 4139 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4140 elif self._match_pair(TokenType.LT, TokenType.LT): 4141 this = self.expression( 4142 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4143 ) 4144 elif self._match_pair(TokenType.GT, TokenType.GT): 4145 this = self.expression( 4146 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4147 ) 4148 else: 4149 break 4150 4151 return this 4152 4153 def _parse_term(self) -> t.Optional[exp.Expression]: 4154 return self._parse_tokens(self._parse_factor, self.TERM) 4155 4156 def _parse_factor(self) -> t.Optional[exp.Expression]: 4157 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4158 this = parse_method() 4159 4160 while self._match_set(self.FACTOR): 4161 this = self.expression( 4162 self.FACTOR[self._prev.token_type], 4163 this=this, 4164 comments=self._prev_comments, 4165 expression=parse_method(), 4166 ) 4167 if isinstance(this, exp.Div): 4168 this.args["typed"] = self.dialect.TYPED_DIVISION 4169 this.args["safe"] = self.dialect.SAFE_DIVISION 4170 4171 return this 4172 4173 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4174 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4175 4176 def _parse_unary(self) -> t.Optional[exp.Expression]: 4177 if self._match_set(self.UNARY_PARSERS): 4178 return self.UNARY_PARSERS[self._prev.token_type](self) 4179 return self._parse_at_time_zone(self._parse_type()) 4180 4181 def _parse_type( 4182 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4183 ) -> t.Optional[exp.Expression]: 4184 interval = parse_interval and self._parse_interval() 4185 if interval: 4186 return interval 4187 4188 index = self._index 4189 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4190 4191 if data_type: 4192 index2 = self._index 4193 this = self._parse_primary() 4194 4195 if isinstance(this, exp.Literal): 4196 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4197 if parser: 4198 return parser(self, this, data_type) 4199 4200 return self.expression(exp.Cast, this=this, to=data_type) 4201 4202 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4203 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4204 # 4205 # If the index difference here is greater than 1, that means the parser itself must have 4206 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4207 # 4208 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4209 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4210 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4211 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4212 # 4213 # In these cases, we don't really want to return the converted type, but instead retreat 4214 # and try to parse a Column or Identifier in the section below. 4215 if data_type.expressions and index2 - index > 1: 4216 self._retreat(index2) 4217 return self._parse_column_ops(data_type) 4218 4219 self._retreat(index) 4220 4221 if fallback_to_identifier: 4222 return self._parse_id_var() 4223 4224 this = self._parse_column() 4225 return this and self._parse_column_ops(this) 4226 4227 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4228 this = self._parse_type() 4229 if not this: 4230 return None 4231 4232 if isinstance(this, exp.Column) and not this.table: 4233 this = exp.var(this.name.upper()) 4234 4235 return self.expression( 4236 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4237 ) 4238 4239 def _parse_types( 4240 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4241 ) -> t.Optional[exp.Expression]: 4242 index = self._index 4243 4244 this: t.Optional[exp.Expression] = None 4245 prefix = self._match_text_seq("SYSUDTLIB", ".") 4246 4247 if not self._match_set(self.TYPE_TOKENS): 4248 identifier = allow_identifiers and self._parse_id_var( 4249 any_token=False, tokens=(TokenType.VAR,) 4250 ) 4251 if identifier: 4252 tokens = self.dialect.tokenize(identifier.name) 4253 4254 if len(tokens) != 1: 4255 self.raise_error("Unexpected identifier", self._prev) 4256 4257 if tokens[0].token_type in self.TYPE_TOKENS: 4258 self._prev = tokens[0] 4259 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4260 type_name = identifier.name 4261 4262 while self._match(TokenType.DOT): 4263 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4264 4265 this = exp.DataType.build(type_name, udt=True) 4266 else: 4267 self._retreat(self._index - 1) 4268 return None 4269 else: 4270 return None 4271 4272 type_token = self._prev.token_type 4273 4274 if type_token == TokenType.PSEUDO_TYPE: 4275 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4276 4277 if type_token == TokenType.OBJECT_IDENTIFIER: 4278 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4279 4280 nested = type_token in self.NESTED_TYPE_TOKENS 4281 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4282 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4283 expressions = None 4284 maybe_func = False 4285 4286 if self._match(TokenType.L_PAREN): 4287 if is_struct: 4288 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4289 elif nested: 4290 expressions = self._parse_csv( 4291 lambda: self._parse_types( 4292 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4293 ) 4294 ) 4295 elif type_token in self.ENUM_TYPE_TOKENS: 4296 expressions = self._parse_csv(self._parse_equality) 4297 elif is_aggregate: 4298 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4299 any_token=False, tokens=(TokenType.VAR,) 4300 ) 4301 if not func_or_ident or not self._match(TokenType.COMMA): 4302 return None 4303 expressions = self._parse_csv( 4304 lambda: self._parse_types( 4305 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4306 ) 4307 ) 4308 expressions.insert(0, func_or_ident) 4309 else: 4310 expressions = self._parse_csv(self._parse_type_size) 4311 4312 if not expressions or not self._match(TokenType.R_PAREN): 4313 self._retreat(index) 4314 return None 4315 4316 maybe_func = True 4317 4318 values: t.Optional[t.List[exp.Expression]] = None 4319 4320 if nested and self._match(TokenType.LT): 4321 if is_struct: 4322 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4323 else: 4324 expressions = self._parse_csv( 4325 lambda: self._parse_types( 4326 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4327 ) 4328 ) 4329 4330 if not self._match(TokenType.GT): 4331 self.raise_error("Expecting >") 4332 4333 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4334 values = self._parse_csv(self._parse_conjunction) 4335 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4336 4337 if type_token in self.TIMESTAMPS: 4338 if self._match_text_seq("WITH", "TIME", "ZONE"): 4339 maybe_func = False 4340 tz_type = ( 4341 exp.DataType.Type.TIMETZ 4342 if type_token in self.TIMES 4343 else exp.DataType.Type.TIMESTAMPTZ 4344 ) 4345 this = exp.DataType(this=tz_type, expressions=expressions) 4346 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4347 maybe_func = False 4348 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4349 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4350 maybe_func = False 4351 elif type_token == TokenType.INTERVAL: 4352 unit = self._parse_var(upper=True) 4353 if unit: 4354 if self._match_text_seq("TO"): 4355 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4356 4357 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4358 else: 4359 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4360 4361 if maybe_func and check_func: 4362 index2 = self._index 4363 peek = self._parse_string() 4364 4365 if not peek: 4366 self._retreat(index) 4367 return None 4368 4369 self._retreat(index2) 4370 4371 if not this: 4372 if self._match_text_seq("UNSIGNED"): 4373 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4374 if not unsigned_type_token: 4375 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4376 4377 type_token = unsigned_type_token or type_token 4378 4379 this = exp.DataType( 4380 this=exp.DataType.Type[type_token.value], 4381 expressions=expressions, 4382 nested=nested, 4383 values=values, 4384 prefix=prefix, 4385 ) 4386 elif expressions: 4387 this.set("expressions", expressions) 4388 4389 index = self._index 4390 4391 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4392 matched_array = self._match(TokenType.ARRAY) 4393 4394 while self._curr: 4395 matched_l_bracket = self._match(TokenType.L_BRACKET) 4396 if not matched_l_bracket and not matched_array: 4397 break 4398 4399 matched_array = False 4400 values = self._parse_csv(self._parse_conjunction) or None 4401 if values and not schema: 4402 self._retreat(index) 4403 break 4404 4405 this = exp.DataType( 4406 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4407 ) 4408 self._match(TokenType.R_BRACKET) 4409 4410 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4411 converter = self.TYPE_CONVERTER.get(this.this) 4412 if converter: 4413 this = converter(t.cast(exp.DataType, this)) 4414 4415 return this 4416 4417 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4418 index = self._index 4419 this = ( 4420 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4421 or self._parse_id_var() 4422 ) 4423 self._match(TokenType.COLON) 4424 4425 if ( 4426 type_required 4427 and not isinstance(this, exp.DataType) 4428 and not self._match_set(self.TYPE_TOKENS, advance=False) 4429 ): 4430 self._retreat(index) 4431 return self._parse_types() 4432 4433 return self._parse_column_def(this) 4434 4435 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4436 if not self._match_text_seq("AT", "TIME", "ZONE"): 4437 return this 4438 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4439 4440 def _parse_column(self) -> t.Optional[exp.Expression]: 4441 this = self._parse_column_reference() 4442 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4443 4444 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4445 this = self._parse_field() 4446 if ( 4447 not this 4448 and self._match(TokenType.VALUES, advance=False) 4449 and self.VALUES_FOLLOWED_BY_PAREN 4450 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4451 ): 4452 this = self._parse_id_var() 4453 4454 if isinstance(this, exp.Identifier): 4455 # We bubble up comments from the Identifier to the Column 4456 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4457 4458 return this 4459 4460 def _parse_colon_as_json_extract( 4461 self, this: t.Optional[exp.Expression] 4462 ) -> t.Optional[exp.Expression]: 4463 casts = [] 4464 json_path = [] 4465 4466 while self._match(TokenType.COLON): 4467 start_index = self._index 4468 path = self._parse_column_ops(self._parse_field(any_token=True)) 4469 4470 # The cast :: operator has a lower precedence than the extraction operator :, so 4471 # we rearrange the AST appropriately to avoid casting the JSON path 4472 while isinstance(path, exp.Cast): 4473 casts.append(path.to) 4474 path = path.this 4475 4476 if casts: 4477 dcolon_offset = next( 4478 i 4479 for i, t in enumerate(self._tokens[start_index:]) 4480 if t.token_type == TokenType.DCOLON 4481 ) 4482 end_token = self._tokens[start_index + dcolon_offset - 1] 4483 else: 4484 end_token = self._prev 4485 4486 if path: 4487 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4488 4489 if json_path: 4490 this = self.expression( 4491 exp.JSONExtract, 4492 this=this, 4493 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4494 ) 4495 4496 while casts: 4497 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4498 4499 return this 4500 4501 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4502 this = self._parse_bracket(this) 4503 4504 while self._match_set(self.COLUMN_OPERATORS): 4505 op_token = self._prev.token_type 4506 op = self.COLUMN_OPERATORS.get(op_token) 4507 4508 if op_token == TokenType.DCOLON: 4509 field = self._parse_types() 4510 if not field: 4511 self.raise_error("Expected type") 4512 elif op and self._curr: 4513 field = self._parse_column_reference() 4514 else: 4515 field = self._parse_field(any_token=True, anonymous_func=True) 4516 4517 if isinstance(field, exp.Func) and this: 4518 # bigquery allows function calls like x.y.count(...) 4519 # SAFE.SUBSTR(...) 4520 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4521 this = exp.replace_tree( 4522 this, 4523 lambda n: ( 4524 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4525 if n.table 4526 else n.this 4527 ) 4528 if isinstance(n, exp.Column) 4529 else n, 4530 ) 4531 4532 if op: 4533 this = op(self, this, field) 4534 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4535 this = self.expression( 4536 exp.Column, 4537 this=field, 4538 table=this.this, 4539 db=this.args.get("table"), 4540 catalog=this.args.get("db"), 4541 ) 4542 else: 4543 this = self.expression(exp.Dot, this=this, expression=field) 4544 4545 this = self._parse_bracket(this) 4546 4547 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4548 4549 def _parse_primary(self) -> t.Optional[exp.Expression]: 4550 if self._match_set(self.PRIMARY_PARSERS): 4551 token_type = self._prev.token_type 4552 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4553 4554 if token_type == TokenType.STRING: 4555 expressions = [primary] 4556 while self._match(TokenType.STRING): 4557 expressions.append(exp.Literal.string(self._prev.text)) 4558 4559 if len(expressions) > 1: 4560 return self.expression(exp.Concat, expressions=expressions) 4561 4562 return primary 4563 4564 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4565 return exp.Literal.number(f"0.{self._prev.text}") 4566 4567 if self._match(TokenType.L_PAREN): 4568 comments = self._prev_comments 4569 query = self._parse_select() 4570 4571 if query: 4572 expressions = [query] 4573 else: 4574 expressions = self._parse_expressions() 4575 4576 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4577 4578 if not this and self._match(TokenType.R_PAREN, advance=False): 4579 this = self.expression(exp.Tuple) 4580 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4581 this = self._parse_subquery(this=this, parse_alias=False) 4582 elif isinstance(this, exp.Subquery): 4583 this = self._parse_subquery( 4584 this=self._parse_set_operations(this), parse_alias=False 4585 ) 4586 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4587 this = self.expression(exp.Tuple, expressions=expressions) 4588 else: 4589 this = self.expression(exp.Paren, this=this) 4590 4591 if this: 4592 this.add_comments(comments) 4593 4594 self._match_r_paren(expression=this) 4595 return this 4596 4597 return None 4598 4599 def _parse_field( 4600 self, 4601 any_token: bool = False, 4602 tokens: t.Optional[t.Collection[TokenType]] = None, 4603 anonymous_func: bool = False, 4604 ) -> t.Optional[exp.Expression]: 4605 if anonymous_func: 4606 field = ( 4607 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4608 or self._parse_primary() 4609 ) 4610 else: 4611 field = self._parse_primary() or self._parse_function( 4612 anonymous=anonymous_func, any_token=any_token 4613 ) 4614 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4615 4616 def _parse_function( 4617 self, 4618 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4619 anonymous: bool = False, 4620 optional_parens: bool = True, 4621 any_token: bool = False, 4622 ) -> t.Optional[exp.Expression]: 4623 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4624 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4625 fn_syntax = False 4626 if ( 4627 self._match(TokenType.L_BRACE, advance=False) 4628 and self._next 4629 and self._next.text.upper() == "FN" 4630 ): 4631 self._advance(2) 4632 fn_syntax = True 4633 4634 func = self._parse_function_call( 4635 functions=functions, 4636 anonymous=anonymous, 4637 optional_parens=optional_parens, 4638 any_token=any_token, 4639 ) 4640 4641 if fn_syntax: 4642 self._match(TokenType.R_BRACE) 4643 4644 return func 4645 4646 def _parse_function_call( 4647 self, 4648 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4649 anonymous: bool = False, 4650 optional_parens: bool = True, 4651 any_token: bool = False, 4652 ) -> t.Optional[exp.Expression]: 4653 if not self._curr: 4654 return None 4655 4656 comments = self._curr.comments 4657 token_type = self._curr.token_type 4658 this = self._curr.text 4659 upper = this.upper() 4660 4661 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4662 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4663 self._advance() 4664 return self._parse_window(parser(self)) 4665 4666 if not self._next or self._next.token_type != TokenType.L_PAREN: 4667 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4668 self._advance() 4669 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4670 4671 return None 4672 4673 if any_token: 4674 if token_type in self.RESERVED_TOKENS: 4675 return None 4676 elif token_type not in self.FUNC_TOKENS: 4677 return None 4678 4679 self._advance(2) 4680 4681 parser = self.FUNCTION_PARSERS.get(upper) 4682 if parser and not anonymous: 4683 this = parser(self) 4684 else: 4685 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4686 4687 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4688 this = self.expression(subquery_predicate, this=self._parse_select()) 4689 self._match_r_paren() 4690 return this 4691 4692 if functions is None: 4693 functions = self.FUNCTIONS 4694 4695 function = functions.get(upper) 4696 4697 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4698 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4699 4700 if alias: 4701 args = self._kv_to_prop_eq(args) 4702 4703 if function and not anonymous: 4704 if "dialect" in function.__code__.co_varnames: 4705 func = function(args, dialect=self.dialect) 4706 else: 4707 func = function(args) 4708 4709 func = self.validate_expression(func, args) 4710 if not self.dialect.NORMALIZE_FUNCTIONS: 4711 func.meta["name"] = this 4712 4713 this = func 4714 else: 4715 if token_type == TokenType.IDENTIFIER: 4716 this = exp.Identifier(this=this, quoted=True) 4717 this = self.expression(exp.Anonymous, this=this, expressions=args) 4718 4719 if isinstance(this, exp.Expression): 4720 this.add_comments(comments) 4721 4722 self._match_r_paren(this) 4723 return self._parse_window(this) 4724 4725 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4726 transformed = [] 4727 4728 for e in expressions: 4729 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4730 if isinstance(e, exp.Alias): 4731 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4732 4733 if not isinstance(e, exp.PropertyEQ): 4734 e = self.expression( 4735 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4736 ) 4737 4738 if isinstance(e.this, exp.Column): 4739 e.this.replace(e.this.this) 4740 4741 transformed.append(e) 4742 4743 return transformed 4744 4745 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4746 return self._parse_column_def(self._parse_id_var()) 4747 4748 def _parse_user_defined_function( 4749 self, kind: t.Optional[TokenType] = None 4750 ) -> t.Optional[exp.Expression]: 4751 this = self._parse_id_var() 4752 4753 while self._match(TokenType.DOT): 4754 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4755 4756 if not self._match(TokenType.L_PAREN): 4757 return this 4758 4759 expressions = self._parse_csv(self._parse_function_parameter) 4760 self._match_r_paren() 4761 return self.expression( 4762 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4763 ) 4764 4765 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4766 literal = self._parse_primary() 4767 if literal: 4768 return self.expression(exp.Introducer, this=token.text, expression=literal) 4769 4770 return self.expression(exp.Identifier, this=token.text) 4771 4772 def _parse_session_parameter(self) -> exp.SessionParameter: 4773 kind = None 4774 this = self._parse_id_var() or self._parse_primary() 4775 4776 if this and self._match(TokenType.DOT): 4777 kind = this.name 4778 this = self._parse_var() or self._parse_primary() 4779 4780 return self.expression(exp.SessionParameter, this=this, kind=kind) 4781 4782 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4783 return self._parse_id_var() 4784 4785 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4786 index = self._index 4787 4788 if self._match(TokenType.L_PAREN): 4789 expressions = t.cast( 4790 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4791 ) 4792 4793 if not self._match(TokenType.R_PAREN): 4794 self._retreat(index) 4795 else: 4796 expressions = [self._parse_lambda_arg()] 4797 4798 if self._match_set(self.LAMBDAS): 4799 return self.LAMBDAS[self._prev.token_type](self, expressions) 4800 4801 self._retreat(index) 4802 4803 this: t.Optional[exp.Expression] 4804 4805 if self._match(TokenType.DISTINCT): 4806 this = self.expression( 4807 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4808 ) 4809 else: 4810 this = self._parse_select_or_expression(alias=alias) 4811 4812 return self._parse_limit( 4813 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4814 ) 4815 4816 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4817 index = self._index 4818 if not self._match(TokenType.L_PAREN): 4819 return this 4820 4821 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4822 # expr can be of both types 4823 if self._match_set(self.SELECT_START_TOKENS): 4824 self._retreat(index) 4825 return this 4826 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4827 self._match_r_paren() 4828 return self.expression(exp.Schema, this=this, expressions=args) 4829 4830 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4831 return self._parse_column_def(self._parse_field(any_token=True)) 4832 4833 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4834 # column defs are not really columns, they're identifiers 4835 if isinstance(this, exp.Column): 4836 this = this.this 4837 4838 kind = self._parse_types(schema=True) 4839 4840 if self._match_text_seq("FOR", "ORDINALITY"): 4841 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4842 4843 constraints: t.List[exp.Expression] = [] 4844 4845 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4846 ("ALIAS", "MATERIALIZED") 4847 ): 4848 persisted = self._prev.text.upper() == "MATERIALIZED" 4849 constraints.append( 4850 self.expression( 4851 exp.ComputedColumnConstraint, 4852 this=self._parse_conjunction(), 4853 persisted=persisted or self._match_text_seq("PERSISTED"), 4854 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4855 ) 4856 ) 4857 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4858 self._match(TokenType.ALIAS) 4859 constraints.append( 4860 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4861 ) 4862 4863 while True: 4864 constraint = self._parse_column_constraint() 4865 if not constraint: 4866 break 4867 constraints.append(constraint) 4868 4869 if not kind and not constraints: 4870 return this 4871 4872 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4873 4874 def _parse_auto_increment( 4875 self, 4876 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4877 start = None 4878 increment = None 4879 4880 if self._match(TokenType.L_PAREN, advance=False): 4881 args = self._parse_wrapped_csv(self._parse_bitwise) 4882 start = seq_get(args, 0) 4883 increment = seq_get(args, 1) 4884 elif self._match_text_seq("START"): 4885 start = self._parse_bitwise() 4886 self._match_text_seq("INCREMENT") 4887 increment = self._parse_bitwise() 4888 4889 if start and increment: 4890 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4891 4892 return exp.AutoIncrementColumnConstraint() 4893 4894 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4895 if not self._match_text_seq("REFRESH"): 4896 self._retreat(self._index - 1) 4897 return None 4898 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4899 4900 def _parse_compress(self) -> exp.CompressColumnConstraint: 4901 if self._match(TokenType.L_PAREN, advance=False): 4902 return self.expression( 4903 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4904 ) 4905 4906 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4907 4908 def _parse_generated_as_identity( 4909 self, 4910 ) -> ( 4911 exp.GeneratedAsIdentityColumnConstraint 4912 | exp.ComputedColumnConstraint 4913 | exp.GeneratedAsRowColumnConstraint 4914 ): 4915 if self._match_text_seq("BY", "DEFAULT"): 4916 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4917 this = self.expression( 4918 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4919 ) 4920 else: 4921 self._match_text_seq("ALWAYS") 4922 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4923 4924 self._match(TokenType.ALIAS) 4925 4926 if self._match_text_seq("ROW"): 4927 start = self._match_text_seq("START") 4928 if not start: 4929 self._match(TokenType.END) 4930 hidden = self._match_text_seq("HIDDEN") 4931 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4932 4933 identity = self._match_text_seq("IDENTITY") 4934 4935 if self._match(TokenType.L_PAREN): 4936 if self._match(TokenType.START_WITH): 4937 this.set("start", self._parse_bitwise()) 4938 if self._match_text_seq("INCREMENT", "BY"): 4939 this.set("increment", self._parse_bitwise()) 4940 if self._match_text_seq("MINVALUE"): 4941 this.set("minvalue", self._parse_bitwise()) 4942 if self._match_text_seq("MAXVALUE"): 4943 this.set("maxvalue", self._parse_bitwise()) 4944 4945 if self._match_text_seq("CYCLE"): 4946 this.set("cycle", True) 4947 elif self._match_text_seq("NO", "CYCLE"): 4948 this.set("cycle", False) 4949 4950 if not identity: 4951 this.set("expression", self._parse_range()) 4952 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4953 args = self._parse_csv(self._parse_bitwise) 4954 this.set("start", seq_get(args, 0)) 4955 this.set("increment", seq_get(args, 1)) 4956 4957 self._match_r_paren() 4958 4959 return this 4960 4961 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4962 self._match_text_seq("LENGTH") 4963 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4964 4965 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4966 if self._match_text_seq("NULL"): 4967 return self.expression(exp.NotNullColumnConstraint) 4968 if self._match_text_seq("CASESPECIFIC"): 4969 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4970 if self._match_text_seq("FOR", "REPLICATION"): 4971 return self.expression(exp.NotForReplicationColumnConstraint) 4972 return None 4973 4974 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4975 if self._match(TokenType.CONSTRAINT): 4976 this = self._parse_id_var() 4977 else: 4978 this = None 4979 4980 if self._match_texts(self.CONSTRAINT_PARSERS): 4981 return self.expression( 4982 exp.ColumnConstraint, 4983 this=this, 4984 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4985 ) 4986 4987 return this 4988 4989 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4990 if not self._match(TokenType.CONSTRAINT): 4991 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4992 4993 return self.expression( 4994 exp.Constraint, 4995 this=self._parse_id_var(), 4996 expressions=self._parse_unnamed_constraints(), 4997 ) 4998 4999 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5000 constraints = [] 5001 while True: 5002 constraint = self._parse_unnamed_constraint() or self._parse_function() 5003 if not constraint: 5004 break 5005 constraints.append(constraint) 5006 5007 return constraints 5008 5009 def _parse_unnamed_constraint( 5010 self, constraints: t.Optional[t.Collection[str]] = None 5011 ) -> t.Optional[exp.Expression]: 5012 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5013 constraints or self.CONSTRAINT_PARSERS 5014 ): 5015 return None 5016 5017 constraint = self._prev.text.upper() 5018 if constraint not in self.CONSTRAINT_PARSERS: 5019 self.raise_error(f"No parser found for schema constraint {constraint}.") 5020 5021 return self.CONSTRAINT_PARSERS[constraint](self) 5022 5023 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5024 self._match_text_seq("KEY") 5025 return self.expression( 5026 exp.UniqueColumnConstraint, 5027 this=self._parse_schema(self._parse_id_var(any_token=False)), 5028 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5029 on_conflict=self._parse_on_conflict(), 5030 ) 5031 5032 def _parse_key_constraint_options(self) -> t.List[str]: 5033 options = [] 5034 while True: 5035 if not self._curr: 5036 break 5037 5038 if self._match(TokenType.ON): 5039 action = None 5040 on = self._advance_any() and self._prev.text 5041 5042 if self._match_text_seq("NO", "ACTION"): 5043 action = "NO ACTION" 5044 elif self._match_text_seq("CASCADE"): 5045 action = "CASCADE" 5046 elif self._match_text_seq("RESTRICT"): 5047 action = "RESTRICT" 5048 elif self._match_pair(TokenType.SET, TokenType.NULL): 5049 action = "SET NULL" 5050 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5051 action = "SET DEFAULT" 5052 else: 5053 self.raise_error("Invalid key constraint") 5054 5055 options.append(f"ON {on} {action}") 5056 elif self._match_text_seq("NOT", "ENFORCED"): 5057 options.append("NOT ENFORCED") 5058 elif self._match_text_seq("DEFERRABLE"): 5059 options.append("DEFERRABLE") 5060 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5061 options.append("INITIALLY DEFERRED") 5062 elif self._match_text_seq("NORELY"): 5063 options.append("NORELY") 5064 elif self._match_text_seq("MATCH", "FULL"): 5065 options.append("MATCH FULL") 5066 else: 5067 break 5068 5069 return options 5070 5071 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5072 if match and not self._match(TokenType.REFERENCES): 5073 return None 5074 5075 expressions = None 5076 this = self._parse_table(schema=True) 5077 options = self._parse_key_constraint_options() 5078 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5079 5080 def _parse_foreign_key(self) -> exp.ForeignKey: 5081 expressions = self._parse_wrapped_id_vars() 5082 reference = self._parse_references() 5083 options = {} 5084 5085 while self._match(TokenType.ON): 5086 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5087 self.raise_error("Expected DELETE or UPDATE") 5088 5089 kind = self._prev.text.lower() 5090 5091 if self._match_text_seq("NO", "ACTION"): 5092 action = "NO ACTION" 5093 elif self._match(TokenType.SET): 5094 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5095 action = "SET " + self._prev.text.upper() 5096 else: 5097 self._advance() 5098 action = self._prev.text.upper() 5099 5100 options[kind] = action 5101 5102 return self.expression( 5103 exp.ForeignKey, 5104 expressions=expressions, 5105 reference=reference, 5106 **options, # type: ignore 5107 ) 5108 5109 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5110 return self._parse_field() 5111 5112 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5113 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5114 self._retreat(self._index - 1) 5115 return None 5116 5117 id_vars = self._parse_wrapped_id_vars() 5118 return self.expression( 5119 exp.PeriodForSystemTimeConstraint, 5120 this=seq_get(id_vars, 0), 5121 expression=seq_get(id_vars, 1), 5122 ) 5123 5124 def _parse_primary_key( 5125 self, wrapped_optional: bool = False, in_props: bool = False 5126 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5127 desc = ( 5128 self._match_set((TokenType.ASC, TokenType.DESC)) 5129 and self._prev.token_type == TokenType.DESC 5130 ) 5131 5132 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5133 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5134 5135 expressions = self._parse_wrapped_csv( 5136 self._parse_primary_key_part, optional=wrapped_optional 5137 ) 5138 options = self._parse_key_constraint_options() 5139 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5140 5141 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5142 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5143 5144 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5145 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5146 return this 5147 5148 bracket_kind = self._prev.token_type 5149 expressions = self._parse_csv( 5150 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5151 ) 5152 5153 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5154 self.raise_error("Expected ]") 5155 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5156 self.raise_error("Expected }") 5157 5158 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5159 if bracket_kind == TokenType.L_BRACE: 5160 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5161 elif not this or this.name.upper() == "ARRAY": 5162 this = self.expression(exp.Array, expressions=expressions) 5163 else: 5164 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5165 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5166 5167 self._add_comments(this) 5168 return self._parse_bracket(this) 5169 5170 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5171 if self._match(TokenType.COLON): 5172 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5173 return this 5174 5175 def _parse_case(self) -> t.Optional[exp.Expression]: 5176 ifs = [] 5177 default = None 5178 5179 comments = self._prev_comments 5180 expression = self._parse_conjunction() 5181 5182 while self._match(TokenType.WHEN): 5183 this = self._parse_conjunction() 5184 self._match(TokenType.THEN) 5185 then = self._parse_conjunction() 5186 ifs.append(self.expression(exp.If, this=this, true=then)) 5187 5188 if self._match(TokenType.ELSE): 5189 default = self._parse_conjunction() 5190 5191 if not self._match(TokenType.END): 5192 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5193 default = exp.column("interval") 5194 else: 5195 self.raise_error("Expected END after CASE", self._prev) 5196 5197 return self.expression( 5198 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5199 ) 5200 5201 def _parse_if(self) -> t.Optional[exp.Expression]: 5202 if self._match(TokenType.L_PAREN): 5203 args = self._parse_csv(self._parse_conjunction) 5204 this = self.validate_expression(exp.If.from_arg_list(args), args) 5205 self._match_r_paren() 5206 else: 5207 index = self._index - 1 5208 5209 if self.NO_PAREN_IF_COMMANDS and index == 0: 5210 return self._parse_as_command(self._prev) 5211 5212 condition = self._parse_conjunction() 5213 5214 if not condition: 5215 self._retreat(index) 5216 return None 5217 5218 self._match(TokenType.THEN) 5219 true = self._parse_conjunction() 5220 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5221 self._match(TokenType.END) 5222 this = self.expression(exp.If, this=condition, true=true, false=false) 5223 5224 return this 5225 5226 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5227 if not self._match_text_seq("VALUE", "FOR"): 5228 self._retreat(self._index - 1) 5229 return None 5230 5231 return self.expression( 5232 exp.NextValueFor, 5233 this=self._parse_column(), 5234 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5235 ) 5236 5237 def _parse_extract(self) -> exp.Extract: 5238 this = self._parse_function() or self._parse_var() or self._parse_type() 5239 5240 if self._match(TokenType.FROM): 5241 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5242 5243 if not self._match(TokenType.COMMA): 5244 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5245 5246 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5247 5248 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5249 this = self._parse_conjunction() 5250 5251 if not self._match(TokenType.ALIAS): 5252 if self._match(TokenType.COMMA): 5253 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5254 5255 self.raise_error("Expected AS after CAST") 5256 5257 fmt = None 5258 to = self._parse_types() 5259 5260 if self._match(TokenType.FORMAT): 5261 fmt_string = self._parse_string() 5262 fmt = self._parse_at_time_zone(fmt_string) 5263 5264 if not to: 5265 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5266 if to.this in exp.DataType.TEMPORAL_TYPES: 5267 this = self.expression( 5268 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5269 this=this, 5270 format=exp.Literal.string( 5271 format_time( 5272 fmt_string.this if fmt_string else "", 5273 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5274 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5275 ) 5276 ), 5277 ) 5278 5279 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5280 this.set("zone", fmt.args["zone"]) 5281 return this 5282 elif not to: 5283 self.raise_error("Expected TYPE after CAST") 5284 elif isinstance(to, exp.Identifier): 5285 to = exp.DataType.build(to.name, udt=True) 5286 elif to.this == exp.DataType.Type.CHAR: 5287 if self._match(TokenType.CHARACTER_SET): 5288 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5289 5290 return self.expression( 5291 exp.Cast if strict else exp.TryCast, 5292 this=this, 5293 to=to, 5294 format=fmt, 5295 safe=safe, 5296 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5297 ) 5298 5299 def _parse_string_agg(self) -> exp.Expression: 5300 if self._match(TokenType.DISTINCT): 5301 args: t.List[t.Optional[exp.Expression]] = [ 5302 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5303 ] 5304 if self._match(TokenType.COMMA): 5305 args.extend(self._parse_csv(self._parse_conjunction)) 5306 else: 5307 args = self._parse_csv(self._parse_conjunction) # type: ignore 5308 5309 index = self._index 5310 if not self._match(TokenType.R_PAREN) and args: 5311 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5312 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5313 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5314 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5315 5316 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5317 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5318 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5319 if not self._match_text_seq("WITHIN", "GROUP"): 5320 self._retreat(index) 5321 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5322 5323 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5324 order = self._parse_order(this=seq_get(args, 0)) 5325 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5326 5327 def _parse_convert( 5328 self, strict: bool, safe: t.Optional[bool] = None 5329 ) -> t.Optional[exp.Expression]: 5330 this = self._parse_bitwise() 5331 5332 if self._match(TokenType.USING): 5333 to: t.Optional[exp.Expression] = self.expression( 5334 exp.CharacterSet, this=self._parse_var() 5335 ) 5336 elif self._match(TokenType.COMMA): 5337 to = self._parse_types() 5338 else: 5339 to = None 5340 5341 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5342 5343 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5344 """ 5345 There are generally two variants of the DECODE function: 5346 5347 - DECODE(bin, charset) 5348 - DECODE(expression, search, result [, search, result] ... [, default]) 5349 5350 The second variant will always be parsed into a CASE expression. Note that NULL 5351 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5352 instead of relying on pattern matching. 5353 """ 5354 args = self._parse_csv(self._parse_conjunction) 5355 5356 if len(args) < 3: 5357 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5358 5359 expression, *expressions = args 5360 if not expression: 5361 return None 5362 5363 ifs = [] 5364 for search, result in zip(expressions[::2], expressions[1::2]): 5365 if not search or not result: 5366 return None 5367 5368 if isinstance(search, exp.Literal): 5369 ifs.append( 5370 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5371 ) 5372 elif isinstance(search, exp.Null): 5373 ifs.append( 5374 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5375 ) 5376 else: 5377 cond = exp.or_( 5378 exp.EQ(this=expression.copy(), expression=search), 5379 exp.and_( 5380 exp.Is(this=expression.copy(), expression=exp.Null()), 5381 exp.Is(this=search.copy(), expression=exp.Null()), 5382 copy=False, 5383 ), 5384 copy=False, 5385 ) 5386 ifs.append(exp.If(this=cond, true=result)) 5387 5388 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5389 5390 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5391 self._match_text_seq("KEY") 5392 key = self._parse_column() 5393 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5394 self._match_text_seq("VALUE") 5395 value = self._parse_bitwise() 5396 5397 if not key and not value: 5398 return None 5399 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5400 5401 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5402 if not this or not self._match_text_seq("FORMAT", "JSON"): 5403 return this 5404 5405 return self.expression(exp.FormatJson, this=this) 5406 5407 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5408 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5409 for value in values: 5410 if self._match_text_seq(value, "ON", on): 5411 return f"{value} ON {on}" 5412 5413 return None 5414 5415 @t.overload 5416 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5417 5418 @t.overload 5419 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5420 5421 def _parse_json_object(self, agg=False): 5422 star = self._parse_star() 5423 expressions = ( 5424 [star] 5425 if star 5426 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5427 ) 5428 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5429 5430 unique_keys = None 5431 if self._match_text_seq("WITH", "UNIQUE"): 5432 unique_keys = True 5433 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5434 unique_keys = False 5435 5436 self._match_text_seq("KEYS") 5437 5438 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5439 self._parse_type() 5440 ) 5441 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5442 5443 return self.expression( 5444 exp.JSONObjectAgg if agg else exp.JSONObject, 5445 expressions=expressions, 5446 null_handling=null_handling, 5447 unique_keys=unique_keys, 5448 return_type=return_type, 5449 encoding=encoding, 5450 ) 5451 5452 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5453 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5454 if not self._match_text_seq("NESTED"): 5455 this = self._parse_id_var() 5456 kind = self._parse_types(allow_identifiers=False) 5457 nested = None 5458 else: 5459 this = None 5460 kind = None 5461 nested = True 5462 5463 path = self._match_text_seq("PATH") and self._parse_string() 5464 nested_schema = nested and self._parse_json_schema() 5465 5466 return self.expression( 5467 exp.JSONColumnDef, 5468 this=this, 5469 kind=kind, 5470 path=path, 5471 nested_schema=nested_schema, 5472 ) 5473 5474 def _parse_json_schema(self) -> exp.JSONSchema: 5475 self._match_text_seq("COLUMNS") 5476 return self.expression( 5477 exp.JSONSchema, 5478 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5479 ) 5480 5481 def _parse_json_table(self) -> exp.JSONTable: 5482 this = self._parse_format_json(self._parse_bitwise()) 5483 path = self._match(TokenType.COMMA) and self._parse_string() 5484 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5485 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5486 schema = self._parse_json_schema() 5487 5488 return exp.JSONTable( 5489 this=this, 5490 schema=schema, 5491 path=path, 5492 error_handling=error_handling, 5493 empty_handling=empty_handling, 5494 ) 5495 5496 def _parse_match_against(self) -> exp.MatchAgainst: 5497 expressions = self._parse_csv(self._parse_column) 5498 5499 self._match_text_seq(")", "AGAINST", "(") 5500 5501 this = self._parse_string() 5502 5503 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5504 modifier = "IN NATURAL LANGUAGE MODE" 5505 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5506 modifier = f"{modifier} WITH QUERY EXPANSION" 5507 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5508 modifier = "IN BOOLEAN MODE" 5509 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5510 modifier = "WITH QUERY EXPANSION" 5511 else: 5512 modifier = None 5513 5514 return self.expression( 5515 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5516 ) 5517 5518 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5519 def _parse_open_json(self) -> exp.OpenJSON: 5520 this = self._parse_bitwise() 5521 path = self._match(TokenType.COMMA) and self._parse_string() 5522 5523 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5524 this = self._parse_field(any_token=True) 5525 kind = self._parse_types() 5526 path = self._parse_string() 5527 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5528 5529 return self.expression( 5530 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5531 ) 5532 5533 expressions = None 5534 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5535 self._match_l_paren() 5536 expressions = self._parse_csv(_parse_open_json_column_def) 5537 5538 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5539 5540 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5541 args = self._parse_csv(self._parse_bitwise) 5542 5543 if self._match(TokenType.IN): 5544 return self.expression( 5545 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5546 ) 5547 5548 if haystack_first: 5549 haystack = seq_get(args, 0) 5550 needle = seq_get(args, 1) 5551 else: 5552 needle = seq_get(args, 0) 5553 haystack = seq_get(args, 1) 5554 5555 return self.expression( 5556 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5557 ) 5558 5559 def _parse_predict(self) -> exp.Predict: 5560 self._match_text_seq("MODEL") 5561 this = self._parse_table() 5562 5563 self._match(TokenType.COMMA) 5564 self._match_text_seq("TABLE") 5565 5566 return self.expression( 5567 exp.Predict, 5568 this=this, 5569 expression=self._parse_table(), 5570 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5571 ) 5572 5573 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5574 args = self._parse_csv(self._parse_table) 5575 return exp.JoinHint(this=func_name.upper(), expressions=args) 5576 5577 def _parse_substring(self) -> exp.Substring: 5578 # Postgres supports the form: substring(string [from int] [for int]) 5579 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5580 5581 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5582 5583 if self._match(TokenType.FROM): 5584 args.append(self._parse_bitwise()) 5585 if self._match(TokenType.FOR): 5586 if len(args) == 1: 5587 args.append(exp.Literal.number(1)) 5588 args.append(self._parse_bitwise()) 5589 5590 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5591 5592 def _parse_trim(self) -> exp.Trim: 5593 # https://www.w3resource.com/sql/character-functions/trim.php 5594 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5595 5596 position = None 5597 collation = None 5598 expression = None 5599 5600 if self._match_texts(self.TRIM_TYPES): 5601 position = self._prev.text.upper() 5602 5603 this = self._parse_bitwise() 5604 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5605 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5606 expression = self._parse_bitwise() 5607 5608 if invert_order: 5609 this, expression = expression, this 5610 5611 if self._match(TokenType.COLLATE): 5612 collation = self._parse_bitwise() 5613 5614 return self.expression( 5615 exp.Trim, this=this, position=position, expression=expression, collation=collation 5616 ) 5617 5618 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5619 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5620 5621 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5622 return self._parse_window(self._parse_id_var(), alias=True) 5623 5624 def _parse_respect_or_ignore_nulls( 5625 self, this: t.Optional[exp.Expression] 5626 ) -> t.Optional[exp.Expression]: 5627 if self._match_text_seq("IGNORE", "NULLS"): 5628 return self.expression(exp.IgnoreNulls, this=this) 5629 if self._match_text_seq("RESPECT", "NULLS"): 5630 return self.expression(exp.RespectNulls, this=this) 5631 return this 5632 5633 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5634 if self._match(TokenType.HAVING): 5635 self._match_texts(("MAX", "MIN")) 5636 max = self._prev.text.upper() != "MIN" 5637 return self.expression( 5638 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5639 ) 5640 5641 return this 5642 5643 def _parse_window( 5644 self, this: t.Optional[exp.Expression], alias: bool = False 5645 ) -> t.Optional[exp.Expression]: 5646 func = this 5647 comments = func.comments if isinstance(func, exp.Expression) else None 5648 5649 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5650 self._match(TokenType.WHERE) 5651 this = self.expression( 5652 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5653 ) 5654 self._match_r_paren() 5655 5656 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5657 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5658 if self._match_text_seq("WITHIN", "GROUP"): 5659 order = self._parse_wrapped(self._parse_order) 5660 this = self.expression(exp.WithinGroup, this=this, expression=order) 5661 5662 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5663 # Some dialects choose to implement and some do not. 5664 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5665 5666 # There is some code above in _parse_lambda that handles 5667 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5668 5669 # The below changes handle 5670 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5671 5672 # Oracle allows both formats 5673 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5674 # and Snowflake chose to do the same for familiarity 5675 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5676 if isinstance(this, exp.AggFunc): 5677 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5678 5679 if ignore_respect and ignore_respect is not this: 5680 ignore_respect.replace(ignore_respect.this) 5681 this = self.expression(ignore_respect.__class__, this=this) 5682 5683 this = self._parse_respect_or_ignore_nulls(this) 5684 5685 # bigquery select from window x AS (partition by ...) 5686 if alias: 5687 over = None 5688 self._match(TokenType.ALIAS) 5689 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5690 return this 5691 else: 5692 over = self._prev.text.upper() 5693 5694 if comments and isinstance(func, exp.Expression): 5695 func.pop_comments() 5696 5697 if not self._match(TokenType.L_PAREN): 5698 return self.expression( 5699 exp.Window, 5700 comments=comments, 5701 this=this, 5702 alias=self._parse_id_var(False), 5703 over=over, 5704 ) 5705 5706 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5707 5708 first = self._match(TokenType.FIRST) 5709 if self._match_text_seq("LAST"): 5710 first = False 5711 5712 partition, order = self._parse_partition_and_order() 5713 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5714 5715 if kind: 5716 self._match(TokenType.BETWEEN) 5717 start = self._parse_window_spec() 5718 self._match(TokenType.AND) 5719 end = self._parse_window_spec() 5720 5721 spec = self.expression( 5722 exp.WindowSpec, 5723 kind=kind, 5724 start=start["value"], 5725 start_side=start["side"], 5726 end=end["value"], 5727 end_side=end["side"], 5728 ) 5729 else: 5730 spec = None 5731 5732 self._match_r_paren() 5733 5734 window = self.expression( 5735 exp.Window, 5736 comments=comments, 5737 this=this, 5738 partition_by=partition, 5739 order=order, 5740 spec=spec, 5741 alias=window_alias, 5742 over=over, 5743 first=first, 5744 ) 5745 5746 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5747 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5748 return self._parse_window(window, alias=alias) 5749 5750 return window 5751 5752 def _parse_partition_and_order( 5753 self, 5754 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5755 return self._parse_partition_by(), self._parse_order() 5756 5757 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5758 self._match(TokenType.BETWEEN) 5759 5760 return { 5761 "value": ( 5762 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5763 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5764 or self._parse_bitwise() 5765 ), 5766 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5767 } 5768 5769 def _parse_alias( 5770 self, this: t.Optional[exp.Expression], explicit: bool = False 5771 ) -> t.Optional[exp.Expression]: 5772 any_token = self._match(TokenType.ALIAS) 5773 comments = self._prev_comments or [] 5774 5775 if explicit and not any_token: 5776 return this 5777 5778 if self._match(TokenType.L_PAREN): 5779 aliases = self.expression( 5780 exp.Aliases, 5781 comments=comments, 5782 this=this, 5783 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5784 ) 5785 self._match_r_paren(aliases) 5786 return aliases 5787 5788 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5789 self.STRING_ALIASES and self._parse_string_as_identifier() 5790 ) 5791 5792 if alias: 5793 comments.extend(alias.pop_comments()) 5794 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5795 column = this.this 5796 5797 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5798 if not this.comments and column and column.comments: 5799 this.comments = column.pop_comments() 5800 5801 return this 5802 5803 def _parse_id_var( 5804 self, 5805 any_token: bool = True, 5806 tokens: t.Optional[t.Collection[TokenType]] = None, 5807 ) -> t.Optional[exp.Expression]: 5808 expression = self._parse_identifier() 5809 if not expression and ( 5810 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5811 ): 5812 quoted = self._prev.token_type == TokenType.STRING 5813 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5814 5815 return expression 5816 5817 def _parse_string(self) -> t.Optional[exp.Expression]: 5818 if self._match_set(self.STRING_PARSERS): 5819 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5820 return self._parse_placeholder() 5821 5822 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5823 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5824 5825 def _parse_number(self) -> t.Optional[exp.Expression]: 5826 if self._match_set(self.NUMERIC_PARSERS): 5827 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5828 return self._parse_placeholder() 5829 5830 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5831 if self._match(TokenType.IDENTIFIER): 5832 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5833 return self._parse_placeholder() 5834 5835 def _parse_var( 5836 self, 5837 any_token: bool = False, 5838 tokens: t.Optional[t.Collection[TokenType]] = None, 5839 upper: bool = False, 5840 ) -> t.Optional[exp.Expression]: 5841 if ( 5842 (any_token and self._advance_any()) 5843 or self._match(TokenType.VAR) 5844 or (self._match_set(tokens) if tokens else False) 5845 ): 5846 return self.expression( 5847 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5848 ) 5849 return self._parse_placeholder() 5850 5851 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5852 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5853 self._advance() 5854 return self._prev 5855 return None 5856 5857 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5858 return self._parse_var() or self._parse_string() 5859 5860 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5861 return self._parse_primary() or self._parse_var(any_token=True) 5862 5863 def _parse_null(self) -> t.Optional[exp.Expression]: 5864 if self._match_set(self.NULL_TOKENS): 5865 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5866 return self._parse_placeholder() 5867 5868 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5869 if self._match(TokenType.TRUE): 5870 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5871 if self._match(TokenType.FALSE): 5872 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5873 return self._parse_placeholder() 5874 5875 def _parse_star(self) -> t.Optional[exp.Expression]: 5876 if self._match(TokenType.STAR): 5877 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5878 return self._parse_placeholder() 5879 5880 def _parse_parameter(self) -> exp.Parameter: 5881 this = self._parse_identifier() or self._parse_primary_or_var() 5882 return self.expression(exp.Parameter, this=this) 5883 5884 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5885 if self._match_set(self.PLACEHOLDER_PARSERS): 5886 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5887 if placeholder: 5888 return placeholder 5889 self._advance(-1) 5890 return None 5891 5892 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5893 if not self._match_texts(keywords): 5894 return None 5895 if self._match(TokenType.L_PAREN, advance=False): 5896 return self._parse_wrapped_csv(self._parse_expression) 5897 5898 expression = self._parse_expression() 5899 return [expression] if expression else None 5900 5901 def _parse_csv( 5902 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5903 ) -> t.List[exp.Expression]: 5904 parse_result = parse_method() 5905 items = [parse_result] if parse_result is not None else [] 5906 5907 while self._match(sep): 5908 self._add_comments(parse_result) 5909 parse_result = parse_method() 5910 if parse_result is not None: 5911 items.append(parse_result) 5912 5913 return items 5914 5915 def _parse_tokens( 5916 self, parse_method: t.Callable, expressions: t.Dict 5917 ) -> t.Optional[exp.Expression]: 5918 this = parse_method() 5919 5920 while self._match_set(expressions): 5921 this = self.expression( 5922 expressions[self._prev.token_type], 5923 this=this, 5924 comments=self._prev_comments, 5925 expression=parse_method(), 5926 ) 5927 5928 return this 5929 5930 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5931 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5932 5933 def _parse_wrapped_csv( 5934 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5935 ) -> t.List[exp.Expression]: 5936 return self._parse_wrapped( 5937 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5938 ) 5939 5940 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5941 wrapped = self._match(TokenType.L_PAREN) 5942 if not wrapped and not optional: 5943 self.raise_error("Expecting (") 5944 parse_result = parse_method() 5945 if wrapped: 5946 self._match_r_paren() 5947 return parse_result 5948 5949 def _parse_expressions(self) -> t.List[exp.Expression]: 5950 return self._parse_csv(self._parse_expression) 5951 5952 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5953 return self._parse_select() or self._parse_set_operations( 5954 self._parse_expression() if alias else self._parse_conjunction() 5955 ) 5956 5957 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5958 return self._parse_query_modifiers( 5959 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5960 ) 5961 5962 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5963 this = None 5964 if self._match_texts(self.TRANSACTION_KIND): 5965 this = self._prev.text 5966 5967 self._match_texts(("TRANSACTION", "WORK")) 5968 5969 modes = [] 5970 while True: 5971 mode = [] 5972 while self._match(TokenType.VAR): 5973 mode.append(self._prev.text) 5974 5975 if mode: 5976 modes.append(" ".join(mode)) 5977 if not self._match(TokenType.COMMA): 5978 break 5979 5980 return self.expression(exp.Transaction, this=this, modes=modes) 5981 5982 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5983 chain = None 5984 savepoint = None 5985 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5986 5987 self._match_texts(("TRANSACTION", "WORK")) 5988 5989 if self._match_text_seq("TO"): 5990 self._match_text_seq("SAVEPOINT") 5991 savepoint = self._parse_id_var() 5992 5993 if self._match(TokenType.AND): 5994 chain = not self._match_text_seq("NO") 5995 self._match_text_seq("CHAIN") 5996 5997 if is_rollback: 5998 return self.expression(exp.Rollback, savepoint=savepoint) 5999 6000 return self.expression(exp.Commit, chain=chain) 6001 6002 def _parse_refresh(self) -> exp.Refresh: 6003 self._match(TokenType.TABLE) 6004 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6005 6006 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6007 if not self._match_text_seq("ADD"): 6008 return None 6009 6010 self._match(TokenType.COLUMN) 6011 exists_column = self._parse_exists(not_=True) 6012 expression = self._parse_field_def() 6013 6014 if expression: 6015 expression.set("exists", exists_column) 6016 6017 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6018 if self._match_texts(("FIRST", "AFTER")): 6019 position = self._prev.text 6020 column_position = self.expression( 6021 exp.ColumnPosition, this=self._parse_column(), position=position 6022 ) 6023 expression.set("position", column_position) 6024 6025 return expression 6026 6027 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6028 drop = self._match(TokenType.DROP) and self._parse_drop() 6029 if drop and not isinstance(drop, exp.Command): 6030 drop.set("kind", drop.args.get("kind", "COLUMN")) 6031 return drop 6032 6033 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6034 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6035 return self.expression( 6036 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6037 ) 6038 6039 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6040 index = self._index - 1 6041 6042 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6043 return self._parse_csv( 6044 lambda: self.expression( 6045 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6046 ) 6047 ) 6048 6049 self._retreat(index) 6050 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6051 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6052 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6053 6054 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6055 if self._match_texts(self.ALTER_ALTER_PARSERS): 6056 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6057 6058 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6059 # keyword after ALTER we default to parsing this statement 6060 self._match(TokenType.COLUMN) 6061 column = self._parse_field(any_token=True) 6062 6063 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6064 return self.expression(exp.AlterColumn, this=column, drop=True) 6065 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6066 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6067 if self._match(TokenType.COMMENT): 6068 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6069 if self._match_text_seq("DROP", "NOT", "NULL"): 6070 return self.expression( 6071 exp.AlterColumn, 6072 this=column, 6073 drop=True, 6074 allow_null=True, 6075 ) 6076 if self._match_text_seq("SET", "NOT", "NULL"): 6077 return self.expression( 6078 exp.AlterColumn, 6079 this=column, 6080 allow_null=False, 6081 ) 6082 self._match_text_seq("SET", "DATA") 6083 self._match_text_seq("TYPE") 6084 return self.expression( 6085 exp.AlterColumn, 6086 this=column, 6087 dtype=self._parse_types(), 6088 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6089 using=self._match(TokenType.USING) and self._parse_conjunction(), 6090 ) 6091 6092 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6093 if self._match_texts(("ALL", "EVEN", "AUTO")): 6094 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6095 6096 self._match_text_seq("KEY", "DISTKEY") 6097 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6098 6099 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6100 if compound: 6101 self._match_text_seq("SORTKEY") 6102 6103 if self._match(TokenType.L_PAREN, advance=False): 6104 return self.expression( 6105 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6106 ) 6107 6108 self._match_texts(("AUTO", "NONE")) 6109 return self.expression( 6110 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6111 ) 6112 6113 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6114 index = self._index - 1 6115 6116 partition_exists = self._parse_exists() 6117 if self._match(TokenType.PARTITION, advance=False): 6118 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6119 6120 self._retreat(index) 6121 return self._parse_csv(self._parse_drop_column) 6122 6123 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6124 if self._match(TokenType.COLUMN): 6125 exists = self._parse_exists() 6126 old_column = self._parse_column() 6127 to = self._match_text_seq("TO") 6128 new_column = self._parse_column() 6129 6130 if old_column is None or to is None or new_column is None: 6131 return None 6132 6133 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6134 6135 self._match_text_seq("TO") 6136 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6137 6138 def _parse_alter_table_set(self) -> exp.AlterSet: 6139 alter_set = self.expression(exp.AlterSet) 6140 6141 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6142 "TABLE", "PROPERTIES" 6143 ): 6144 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6145 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6146 alter_set.set("expressions", [self._parse_conjunction()]) 6147 elif self._match_texts(("LOGGED", "UNLOGGED")): 6148 alter_set.set("option", exp.var(self._prev.text.upper())) 6149 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6150 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6151 elif self._match_text_seq("LOCATION"): 6152 alter_set.set("location", self._parse_field()) 6153 elif self._match_text_seq("ACCESS", "METHOD"): 6154 alter_set.set("access_method", self._parse_field()) 6155 elif self._match_text_seq("TABLESPACE"): 6156 alter_set.set("tablespace", self._parse_field()) 6157 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6158 alter_set.set("file_format", [self._parse_field()]) 6159 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6160 alter_set.set("file_format", self._parse_wrapped_options()) 6161 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6162 alter_set.set("copy_options", self._parse_wrapped_options()) 6163 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6164 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6165 else: 6166 if self._match_text_seq("SERDE"): 6167 alter_set.set("serde", self._parse_field()) 6168 6169 alter_set.set("expressions", [self._parse_properties()]) 6170 6171 return alter_set 6172 6173 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6174 start = self._prev 6175 6176 if not self._match(TokenType.TABLE): 6177 return self._parse_as_command(start) 6178 6179 exists = self._parse_exists() 6180 only = self._match_text_seq("ONLY") 6181 this = self._parse_table(schema=True) 6182 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6183 6184 if self._next: 6185 self._advance() 6186 6187 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6188 if parser: 6189 actions = ensure_list(parser(self)) 6190 options = self._parse_csv(self._parse_property) 6191 6192 if not self._curr and actions: 6193 return self.expression( 6194 exp.AlterTable, 6195 this=this, 6196 exists=exists, 6197 actions=actions, 6198 only=only, 6199 options=options, 6200 cluster=cluster, 6201 ) 6202 6203 return self._parse_as_command(start) 6204 6205 def _parse_merge(self) -> exp.Merge: 6206 self._match(TokenType.INTO) 6207 target = self._parse_table() 6208 6209 if target and self._match(TokenType.ALIAS, advance=False): 6210 target.set("alias", self._parse_table_alias()) 6211 6212 self._match(TokenType.USING) 6213 using = self._parse_table() 6214 6215 self._match(TokenType.ON) 6216 on = self._parse_conjunction() 6217 6218 return self.expression( 6219 exp.Merge, 6220 this=target, 6221 using=using, 6222 on=on, 6223 expressions=self._parse_when_matched(), 6224 ) 6225 6226 def _parse_when_matched(self) -> t.List[exp.When]: 6227 whens = [] 6228 6229 while self._match(TokenType.WHEN): 6230 matched = not self._match(TokenType.NOT) 6231 self._match_text_seq("MATCHED") 6232 source = ( 6233 False 6234 if self._match_text_seq("BY", "TARGET") 6235 else self._match_text_seq("BY", "SOURCE") 6236 ) 6237 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6238 6239 self._match(TokenType.THEN) 6240 6241 if self._match(TokenType.INSERT): 6242 _this = self._parse_star() 6243 if _this: 6244 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6245 else: 6246 then = self.expression( 6247 exp.Insert, 6248 this=self._parse_value(), 6249 expression=self._match_text_seq("VALUES") and self._parse_value(), 6250 ) 6251 elif self._match(TokenType.UPDATE): 6252 expressions = self._parse_star() 6253 if expressions: 6254 then = self.expression(exp.Update, expressions=expressions) 6255 else: 6256 then = self.expression( 6257 exp.Update, 6258 expressions=self._match(TokenType.SET) 6259 and self._parse_csv(self._parse_equality), 6260 ) 6261 elif self._match(TokenType.DELETE): 6262 then = self.expression(exp.Var, this=self._prev.text) 6263 else: 6264 then = None 6265 6266 whens.append( 6267 self.expression( 6268 exp.When, 6269 matched=matched, 6270 source=source, 6271 condition=condition, 6272 then=then, 6273 ) 6274 ) 6275 return whens 6276 6277 def _parse_show(self) -> t.Optional[exp.Expression]: 6278 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6279 if parser: 6280 return parser(self) 6281 return self._parse_as_command(self._prev) 6282 6283 def _parse_set_item_assignment( 6284 self, kind: t.Optional[str] = None 6285 ) -> t.Optional[exp.Expression]: 6286 index = self._index 6287 6288 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6289 return self._parse_set_transaction(global_=kind == "GLOBAL") 6290 6291 left = self._parse_primary() or self._parse_column() 6292 assignment_delimiter = self._match_texts(("=", "TO")) 6293 6294 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6295 self._retreat(index) 6296 return None 6297 6298 right = self._parse_statement() or self._parse_id_var() 6299 if isinstance(right, (exp.Column, exp.Identifier)): 6300 right = exp.var(right.name) 6301 6302 this = self.expression(exp.EQ, this=left, expression=right) 6303 return self.expression(exp.SetItem, this=this, kind=kind) 6304 6305 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6306 self._match_text_seq("TRANSACTION") 6307 characteristics = self._parse_csv( 6308 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6309 ) 6310 return self.expression( 6311 exp.SetItem, 6312 expressions=characteristics, 6313 kind="TRANSACTION", 6314 **{"global": global_}, # type: ignore 6315 ) 6316 6317 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6318 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6319 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6320 6321 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6322 index = self._index 6323 set_ = self.expression( 6324 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6325 ) 6326 6327 if self._curr: 6328 self._retreat(index) 6329 return self._parse_as_command(self._prev) 6330 6331 return set_ 6332 6333 def _parse_var_from_options( 6334 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6335 ) -> t.Optional[exp.Var]: 6336 start = self._curr 6337 if not start: 6338 return None 6339 6340 option = start.text.upper() 6341 continuations = options.get(option) 6342 6343 index = self._index 6344 self._advance() 6345 for keywords in continuations or []: 6346 if isinstance(keywords, str): 6347 keywords = (keywords,) 6348 6349 if self._match_text_seq(*keywords): 6350 option = f"{option} {' '.join(keywords)}" 6351 break 6352 else: 6353 if continuations or continuations is None: 6354 if raise_unmatched: 6355 self.raise_error(f"Unknown option {option}") 6356 6357 self._retreat(index) 6358 return None 6359 6360 return exp.var(option) 6361 6362 def _parse_as_command(self, start: Token) -> exp.Command: 6363 while self._curr: 6364 self._advance() 6365 text = self._find_sql(start, self._prev) 6366 size = len(start.text) 6367 self._warn_unsupported() 6368 return exp.Command(this=text[:size], expression=text[size:]) 6369 6370 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6371 settings = [] 6372 6373 self._match_l_paren() 6374 kind = self._parse_id_var() 6375 6376 if self._match(TokenType.L_PAREN): 6377 while True: 6378 key = self._parse_id_var() 6379 value = self._parse_primary() 6380 6381 if not key and value is None: 6382 break 6383 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6384 self._match(TokenType.R_PAREN) 6385 6386 self._match_r_paren() 6387 6388 return self.expression( 6389 exp.DictProperty, 6390 this=this, 6391 kind=kind.this if kind else None, 6392 settings=settings, 6393 ) 6394 6395 def _parse_dict_range(self, this: str) -> exp.DictRange: 6396 self._match_l_paren() 6397 has_min = self._match_text_seq("MIN") 6398 if has_min: 6399 min = self._parse_var() or self._parse_primary() 6400 self._match_text_seq("MAX") 6401 max = self._parse_var() or self._parse_primary() 6402 else: 6403 max = self._parse_var() or self._parse_primary() 6404 min = exp.Literal.number(0) 6405 self._match_r_paren() 6406 return self.expression(exp.DictRange, this=this, min=min, max=max) 6407 6408 def _parse_comprehension( 6409 self, this: t.Optional[exp.Expression] 6410 ) -> t.Optional[exp.Comprehension]: 6411 index = self._index 6412 expression = self._parse_column() 6413 if not self._match(TokenType.IN): 6414 self._retreat(index - 1) 6415 return None 6416 iterator = self._parse_column() 6417 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6418 return self.expression( 6419 exp.Comprehension, 6420 this=this, 6421 expression=expression, 6422 iterator=iterator, 6423 condition=condition, 6424 ) 6425 6426 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6427 if self._match(TokenType.HEREDOC_STRING): 6428 return self.expression(exp.Heredoc, this=self._prev.text) 6429 6430 if not self._match_text_seq("$"): 6431 return None 6432 6433 tags = ["$"] 6434 tag_text = None 6435 6436 if self._is_connected(): 6437 self._advance() 6438 tags.append(self._prev.text.upper()) 6439 else: 6440 self.raise_error("No closing $ found") 6441 6442 if tags[-1] != "$": 6443 if self._is_connected() and self._match_text_seq("$"): 6444 tag_text = tags[-1] 6445 tags.append("$") 6446 else: 6447 self.raise_error("No closing $ found") 6448 6449 heredoc_start = self._curr 6450 6451 while self._curr: 6452 if self._match_text_seq(*tags, advance=False): 6453 this = self._find_sql(heredoc_start, self._prev) 6454 self._advance(len(tags)) 6455 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6456 6457 self._advance() 6458 6459 self.raise_error(f"No closing {''.join(tags)} found") 6460 return None 6461 6462 def _find_parser( 6463 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6464 ) -> t.Optional[t.Callable]: 6465 if not self._curr: 6466 return None 6467 6468 index = self._index 6469 this = [] 6470 while True: 6471 # The current token might be multiple words 6472 curr = self._curr.text.upper() 6473 key = curr.split(" ") 6474 this.append(curr) 6475 6476 self._advance() 6477 result, trie = in_trie(trie, key) 6478 if result == TrieResult.FAILED: 6479 break 6480 6481 if result == TrieResult.EXISTS: 6482 subparser = parsers[" ".join(this)] 6483 return subparser 6484 6485 self._retreat(index) 6486 return None 6487 6488 def _match(self, token_type, advance=True, expression=None): 6489 if not self._curr: 6490 return None 6491 6492 if self._curr.token_type == token_type: 6493 if advance: 6494 self._advance() 6495 self._add_comments(expression) 6496 return True 6497 6498 return None 6499 6500 def _match_set(self, types, advance=True): 6501 if not self._curr: 6502 return None 6503 6504 if self._curr.token_type in types: 6505 if advance: 6506 self._advance() 6507 return True 6508 6509 return None 6510 6511 def _match_pair(self, token_type_a, token_type_b, advance=True): 6512 if not self._curr or not self._next: 6513 return None 6514 6515 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6516 if advance: 6517 self._advance(2) 6518 return True 6519 6520 return None 6521 6522 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6523 if not self._match(TokenType.L_PAREN, expression=expression): 6524 self.raise_error("Expecting (") 6525 6526 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6527 if not self._match(TokenType.R_PAREN, expression=expression): 6528 self.raise_error("Expecting )") 6529 6530 def _match_texts(self, texts, advance=True): 6531 if self._curr and self._curr.text.upper() in texts: 6532 if advance: 6533 self._advance() 6534 return True 6535 return None 6536 6537 def _match_text_seq(self, *texts, advance=True): 6538 index = self._index 6539 for text in texts: 6540 if self._curr and self._curr.text.upper() == text: 6541 self._advance() 6542 else: 6543 self._retreat(index) 6544 return None 6545 6546 if not advance: 6547 self._retreat(index) 6548 6549 return True 6550 6551 def _replace_lambda( 6552 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6553 ) -> t.Optional[exp.Expression]: 6554 if not node: 6555 return node 6556 6557 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6558 6559 for column in node.find_all(exp.Column): 6560 typ = lambda_types.get(column.parts[0].name) 6561 if typ is not None: 6562 dot_or_id = column.to_dot() if column.table else column.this 6563 6564 if typ: 6565 dot_or_id = self.expression( 6566 exp.Cast, 6567 this=dot_or_id, 6568 to=typ, 6569 ) 6570 6571 parent = column.parent 6572 6573 while isinstance(parent, exp.Dot): 6574 if not isinstance(parent.parent, exp.Dot): 6575 parent.replace(dot_or_id) 6576 break 6577 parent = parent.parent 6578 else: 6579 if column is node: 6580 node = dot_or_id 6581 else: 6582 column.replace(dot_or_id) 6583 return node 6584 6585 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6586 start = self._prev 6587 6588 # Not to be confused with TRUNCATE(number, decimals) function call 6589 if self._match(TokenType.L_PAREN): 6590 self._retreat(self._index - 2) 6591 return self._parse_function() 6592 6593 # Clickhouse supports TRUNCATE DATABASE as well 6594 is_database = self._match(TokenType.DATABASE) 6595 6596 self._match(TokenType.TABLE) 6597 6598 exists = self._parse_exists(not_=False) 6599 6600 expressions = self._parse_csv( 6601 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6602 ) 6603 6604 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6605 6606 if self._match_text_seq("RESTART", "IDENTITY"): 6607 identity = "RESTART" 6608 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6609 identity = "CONTINUE" 6610 else: 6611 identity = None 6612 6613 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6614 option = self._prev.text 6615 else: 6616 option = None 6617 6618 partition = self._parse_partition() 6619 6620 # Fallback case 6621 if self._curr: 6622 return self._parse_as_command(start) 6623 6624 return self.expression( 6625 exp.TruncateTable, 6626 expressions=expressions, 6627 is_database=is_database, 6628 exists=exists, 6629 cluster=cluster, 6630 identity=identity, 6631 option=option, 6632 partition=partition, 6633 ) 6634 6635 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6636 this = self._parse_ordered(self._parse_opclass) 6637 6638 if not self._match(TokenType.WITH): 6639 return this 6640 6641 op = self._parse_var(any_token=True) 6642 6643 return self.expression(exp.WithOperator, this=this, op=op) 6644 6645 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6646 opts = [] 6647 self._match(TokenType.EQ) 6648 self._match(TokenType.L_PAREN) 6649 while self._curr and not self._match(TokenType.R_PAREN): 6650 opts.append(self._parse_conjunction()) 6651 self._match(TokenType.COMMA) 6652 return opts 6653 6654 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6655 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6656 6657 options = [] 6658 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6659 option = self._parse_unquoted_field() 6660 value = None 6661 6662 # Some options are defined as functions with the values as params 6663 if not isinstance(option, exp.Func): 6664 prev = self._prev.text.upper() 6665 # Different dialects might separate options and values by white space, "=" and "AS" 6666 self._match(TokenType.EQ) 6667 self._match(TokenType.ALIAS) 6668 6669 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6670 # Snowflake FILE_FORMAT case 6671 value = self._parse_wrapped_options() 6672 else: 6673 value = self._parse_unquoted_field() 6674 6675 param = self.expression(exp.CopyParameter, this=option, expression=value) 6676 options.append(param) 6677 6678 if sep: 6679 self._match(sep) 6680 6681 return options 6682 6683 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6684 expr = self.expression(exp.Credentials) 6685 6686 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6687 expr.set("storage", self._parse_conjunction()) 6688 if self._match_text_seq("CREDENTIALS"): 6689 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6690 creds = ( 6691 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6692 ) 6693 expr.set("credentials", creds) 6694 if self._match_text_seq("ENCRYPTION"): 6695 expr.set("encryption", self._parse_wrapped_options()) 6696 if self._match_text_seq("IAM_ROLE"): 6697 expr.set("iam_role", self._parse_field()) 6698 if self._match_text_seq("REGION"): 6699 expr.set("region", self._parse_field()) 6700 6701 return expr 6702 6703 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6704 return self._parse_field() 6705 6706 def _parse_copy(self) -> exp.Copy | exp.Command: 6707 start = self._prev 6708 6709 self._match(TokenType.INTO) 6710 6711 this = ( 6712 self._parse_conjunction() 6713 if self._match(TokenType.L_PAREN, advance=False) 6714 else self._parse_table(schema=True) 6715 ) 6716 6717 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6718 6719 files = self._parse_csv(self._parse_file_location) 6720 credentials = self._parse_credentials() 6721 6722 self._match_text_seq("WITH") 6723 6724 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6725 6726 # Fallback case 6727 if self._curr: 6728 return self._parse_as_command(start) 6729 6730 return self.expression( 6731 exp.Copy, 6732 this=this, 6733 kind=kind, 6734 credentials=credentials, 6735 files=files, 6736 params=params, 6737 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "MOD": build_mod, 162 "TIME_TO_TIME_STR": lambda args: exp.Cast( 163 this=seq_get(args, 0), 164 to=exp.DataType(this=exp.DataType.Type.TEXT), 165 ), 166 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 167 this=exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 start=exp.Literal.number(1), 172 length=exp.Literal.number(10), 173 ), 174 "VAR_MAP": build_var_map, 175 "LOWER": build_lower, 176 "UPPER": build_upper, 177 "HEX": build_hex, 178 "TO_HEX": build_hex, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LOWCARDINALITY, 198 TokenType.MAP, 199 TokenType.NULLABLE, 200 *STRUCT_TYPE_TOKENS, 201 } 202 203 ENUM_TYPE_TOKENS = { 204 TokenType.ENUM, 205 TokenType.ENUM8, 206 TokenType.ENUM16, 207 } 208 209 AGGREGATE_TYPE_TOKENS = { 210 TokenType.AGGREGATEFUNCTION, 211 TokenType.SIMPLEAGGREGATEFUNCTION, 212 } 213 214 TYPE_TOKENS = { 215 TokenType.BIT, 216 TokenType.BOOLEAN, 217 TokenType.TINYINT, 218 TokenType.UTINYINT, 219 TokenType.SMALLINT, 220 TokenType.USMALLINT, 221 TokenType.INT, 222 TokenType.UINT, 223 TokenType.BIGINT, 224 TokenType.UBIGINT, 225 TokenType.INT128, 226 TokenType.UINT128, 227 TokenType.INT256, 228 TokenType.UINT256, 229 TokenType.MEDIUMINT, 230 TokenType.UMEDIUMINT, 231 TokenType.FIXEDSTRING, 232 TokenType.FLOAT, 233 TokenType.DOUBLE, 234 TokenType.CHAR, 235 TokenType.NCHAR, 236 TokenType.VARCHAR, 237 TokenType.NVARCHAR, 238 TokenType.BPCHAR, 239 TokenType.TEXT, 240 TokenType.MEDIUMTEXT, 241 TokenType.LONGTEXT, 242 TokenType.MEDIUMBLOB, 243 TokenType.LONGBLOB, 244 TokenType.BINARY, 245 TokenType.VARBINARY, 246 TokenType.JSON, 247 TokenType.JSONB, 248 TokenType.INTERVAL, 249 TokenType.TINYBLOB, 250 TokenType.TINYTEXT, 251 TokenType.TIME, 252 TokenType.TIMETZ, 253 TokenType.TIMESTAMP, 254 TokenType.TIMESTAMP_S, 255 TokenType.TIMESTAMP_MS, 256 TokenType.TIMESTAMP_NS, 257 TokenType.TIMESTAMPTZ, 258 TokenType.TIMESTAMPLTZ, 259 TokenType.TIMESTAMPNTZ, 260 TokenType.DATETIME, 261 TokenType.DATETIME64, 262 TokenType.DATE, 263 TokenType.DATE32, 264 TokenType.INT4RANGE, 265 TokenType.INT4MULTIRANGE, 266 TokenType.INT8RANGE, 267 TokenType.INT8MULTIRANGE, 268 TokenType.NUMRANGE, 269 TokenType.NUMMULTIRANGE, 270 TokenType.TSRANGE, 271 TokenType.TSMULTIRANGE, 272 TokenType.TSTZRANGE, 273 TokenType.TSTZMULTIRANGE, 274 TokenType.DATERANGE, 275 TokenType.DATEMULTIRANGE, 276 TokenType.DECIMAL, 277 TokenType.UDECIMAL, 278 TokenType.BIGDECIMAL, 279 TokenType.UUID, 280 TokenType.GEOGRAPHY, 281 TokenType.GEOMETRY, 282 TokenType.HLLSKETCH, 283 TokenType.HSTORE, 284 TokenType.PSEUDO_TYPE, 285 TokenType.SUPER, 286 TokenType.SERIAL, 287 TokenType.SMALLSERIAL, 288 TokenType.BIGSERIAL, 289 TokenType.XML, 290 TokenType.YEAR, 291 TokenType.UNIQUEIDENTIFIER, 292 TokenType.USERDEFINED, 293 TokenType.MONEY, 294 TokenType.SMALLMONEY, 295 TokenType.ROWVERSION, 296 TokenType.IMAGE, 297 TokenType.VARIANT, 298 TokenType.OBJECT, 299 TokenType.OBJECT_IDENTIFIER, 300 TokenType.INET, 301 TokenType.IPADDRESS, 302 TokenType.IPPREFIX, 303 TokenType.IPV4, 304 TokenType.IPV6, 305 TokenType.UNKNOWN, 306 TokenType.NULL, 307 TokenType.NAME, 308 TokenType.TDIGEST, 309 *ENUM_TYPE_TOKENS, 310 *NESTED_TYPE_TOKENS, 311 *AGGREGATE_TYPE_TOKENS, 312 } 313 314 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 315 TokenType.BIGINT: TokenType.UBIGINT, 316 TokenType.INT: TokenType.UINT, 317 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 318 TokenType.SMALLINT: TokenType.USMALLINT, 319 TokenType.TINYINT: TokenType.UTINYINT, 320 TokenType.DECIMAL: TokenType.UDECIMAL, 321 } 322 323 SUBQUERY_PREDICATES = { 324 TokenType.ANY: exp.Any, 325 TokenType.ALL: exp.All, 326 TokenType.EXISTS: exp.Exists, 327 TokenType.SOME: exp.Any, 328 } 329 330 RESERVED_TOKENS = { 331 *Tokenizer.SINGLE_TOKENS.values(), 332 TokenType.SELECT, 333 } - {TokenType.IDENTIFIER} 334 335 DB_CREATABLES = { 336 TokenType.DATABASE, 337 TokenType.DICTIONARY, 338 TokenType.MODEL, 339 TokenType.SCHEMA, 340 TokenType.SEQUENCE, 341 TokenType.STORAGE_INTEGRATION, 342 TokenType.TABLE, 343 TokenType.TAG, 344 TokenType.VIEW, 345 TokenType.WAREHOUSE, 346 TokenType.STREAMLIT, 347 } 348 349 CREATABLES = { 350 TokenType.COLUMN, 351 TokenType.CONSTRAINT, 352 TokenType.FOREIGN_KEY, 353 TokenType.FUNCTION, 354 TokenType.INDEX, 355 TokenType.PROCEDURE, 356 *DB_CREATABLES, 357 } 358 359 # Tokens that can represent identifiers 360 ID_VAR_TOKENS = { 361 TokenType.VAR, 362 TokenType.ANTI, 363 TokenType.APPLY, 364 TokenType.ASC, 365 TokenType.ASOF, 366 TokenType.AUTO_INCREMENT, 367 TokenType.BEGIN, 368 TokenType.BPCHAR, 369 TokenType.CACHE, 370 TokenType.CASE, 371 TokenType.COLLATE, 372 TokenType.COMMAND, 373 TokenType.COMMENT, 374 TokenType.COMMIT, 375 TokenType.CONSTRAINT, 376 TokenType.COPY, 377 TokenType.DEFAULT, 378 TokenType.DELETE, 379 TokenType.DESC, 380 TokenType.DESCRIBE, 381 TokenType.DICTIONARY, 382 TokenType.DIV, 383 TokenType.END, 384 TokenType.EXECUTE, 385 TokenType.ESCAPE, 386 TokenType.FALSE, 387 TokenType.FIRST, 388 TokenType.FILTER, 389 TokenType.FINAL, 390 TokenType.FORMAT, 391 TokenType.FULL, 392 TokenType.IDENTIFIER, 393 TokenType.IS, 394 TokenType.ISNULL, 395 TokenType.INTERVAL, 396 TokenType.KEEP, 397 TokenType.KILL, 398 TokenType.LEFT, 399 TokenType.LOAD, 400 TokenType.MERGE, 401 TokenType.NATURAL, 402 TokenType.NEXT, 403 TokenType.OFFSET, 404 TokenType.OPERATOR, 405 TokenType.ORDINALITY, 406 TokenType.OVERLAPS, 407 TokenType.OVERWRITE, 408 TokenType.PARTITION, 409 TokenType.PERCENT, 410 TokenType.PIVOT, 411 TokenType.PRAGMA, 412 TokenType.RANGE, 413 TokenType.RECURSIVE, 414 TokenType.REFERENCES, 415 TokenType.REFRESH, 416 TokenType.REPLACE, 417 TokenType.RIGHT, 418 TokenType.ROLLUP, 419 TokenType.ROW, 420 TokenType.ROWS, 421 TokenType.SEMI, 422 TokenType.SET, 423 TokenType.SETTINGS, 424 TokenType.SHOW, 425 TokenType.TEMPORARY, 426 TokenType.TOP, 427 TokenType.TRUE, 428 TokenType.TRUNCATE, 429 TokenType.UNIQUE, 430 TokenType.UNNEST, 431 TokenType.UNPIVOT, 432 TokenType.UPDATE, 433 TokenType.USE, 434 TokenType.VOLATILE, 435 TokenType.WINDOW, 436 *CREATABLES, 437 *SUBQUERY_PREDICATES, 438 *TYPE_TOKENS, 439 *NO_PAREN_FUNCTIONS, 440 } 441 442 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 443 444 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASOF, 448 TokenType.FULL, 449 TokenType.LEFT, 450 TokenType.LOCK, 451 TokenType.NATURAL, 452 TokenType.OFFSET, 453 TokenType.RIGHT, 454 TokenType.SEMI, 455 TokenType.WINDOW, 456 } 457 458 ALIAS_TOKENS = ID_VAR_TOKENS 459 460 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 461 462 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 463 464 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 465 466 FUNC_TOKENS = { 467 TokenType.COLLATE, 468 TokenType.COMMAND, 469 TokenType.CURRENT_DATE, 470 TokenType.CURRENT_DATETIME, 471 TokenType.CURRENT_TIMESTAMP, 472 TokenType.CURRENT_TIME, 473 TokenType.CURRENT_USER, 474 TokenType.FILTER, 475 TokenType.FIRST, 476 TokenType.FORMAT, 477 TokenType.GLOB, 478 TokenType.IDENTIFIER, 479 TokenType.INDEX, 480 TokenType.ISNULL, 481 TokenType.ILIKE, 482 TokenType.INSERT, 483 TokenType.LIKE, 484 TokenType.MERGE, 485 TokenType.OFFSET, 486 TokenType.PRIMARY_KEY, 487 TokenType.RANGE, 488 TokenType.REPLACE, 489 TokenType.RLIKE, 490 TokenType.ROW, 491 TokenType.UNNEST, 492 TokenType.VAR, 493 TokenType.LEFT, 494 TokenType.RIGHT, 495 TokenType.SEQUENCE, 496 TokenType.DATE, 497 TokenType.DATETIME, 498 TokenType.TABLE, 499 TokenType.TIMESTAMP, 500 TokenType.TIMESTAMPTZ, 501 TokenType.TRUNCATE, 502 TokenType.WINDOW, 503 TokenType.XOR, 504 *TYPE_TOKENS, 505 *SUBQUERY_PREDICATES, 506 } 507 508 CONJUNCTION = { 509 TokenType.AND: exp.And, 510 TokenType.OR: exp.Or, 511 } 512 513 EQUALITY = { 514 TokenType.EQ: exp.EQ, 515 TokenType.NEQ: exp.NEQ, 516 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 517 } 518 519 COMPARISON = { 520 TokenType.GT: exp.GT, 521 TokenType.GTE: exp.GTE, 522 TokenType.LT: exp.LT, 523 TokenType.LTE: exp.LTE, 524 } 525 526 BITWISE = { 527 TokenType.AMP: exp.BitwiseAnd, 528 TokenType.CARET: exp.BitwiseXor, 529 TokenType.PIPE: exp.BitwiseOr, 530 } 531 532 TERM = { 533 TokenType.DASH: exp.Sub, 534 TokenType.PLUS: exp.Add, 535 TokenType.MOD: exp.Mod, 536 TokenType.COLLATE: exp.Collate, 537 } 538 539 FACTOR = { 540 TokenType.DIV: exp.IntDiv, 541 TokenType.LR_ARROW: exp.Distance, 542 TokenType.SLASH: exp.Div, 543 TokenType.STAR: exp.Mul, 544 } 545 546 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 547 548 TIMES = { 549 TokenType.TIME, 550 TokenType.TIMETZ, 551 } 552 553 TIMESTAMPS = { 554 TokenType.TIMESTAMP, 555 TokenType.TIMESTAMPTZ, 556 TokenType.TIMESTAMPLTZ, 557 *TIMES, 558 } 559 560 SET_OPERATIONS = { 561 TokenType.UNION, 562 TokenType.INTERSECT, 563 TokenType.EXCEPT, 564 } 565 566 JOIN_METHODS = { 567 TokenType.ASOF, 568 TokenType.NATURAL, 569 TokenType.POSITIONAL, 570 } 571 572 JOIN_SIDES = { 573 TokenType.LEFT, 574 TokenType.RIGHT, 575 TokenType.FULL, 576 } 577 578 JOIN_KINDS = { 579 TokenType.INNER, 580 TokenType.OUTER, 581 TokenType.CROSS, 582 TokenType.SEMI, 583 TokenType.ANTI, 584 } 585 586 JOIN_HINTS: t.Set[str] = set() 587 588 LAMBDAS = { 589 TokenType.ARROW: lambda self, expressions: self.expression( 590 exp.Lambda, 591 this=self._replace_lambda( 592 self._parse_conjunction(), 593 expressions, 594 ), 595 expressions=expressions, 596 ), 597 TokenType.FARROW: lambda self, expressions: self.expression( 598 exp.Kwarg, 599 this=exp.var(expressions[0].name), 600 expression=self._parse_conjunction(), 601 ), 602 } 603 604 COLUMN_OPERATORS = { 605 TokenType.DOT: None, 606 TokenType.DCOLON: lambda self, this, to: self.expression( 607 exp.Cast if self.STRICT_CAST else exp.TryCast, 608 this=this, 609 to=to, 610 ), 611 TokenType.ARROW: lambda self, this, path: self.expression( 612 exp.JSONExtract, 613 this=this, 614 expression=self.dialect.to_json_path(path), 615 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 616 ), 617 TokenType.DARROW: lambda self, this, path: self.expression( 618 exp.JSONExtractScalar, 619 this=this, 620 expression=self.dialect.to_json_path(path), 621 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 622 ), 623 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 624 exp.JSONBExtract, 625 this=this, 626 expression=path, 627 ), 628 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 629 exp.JSONBExtractScalar, 630 this=this, 631 expression=path, 632 ), 633 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 634 exp.JSONBContains, 635 this=this, 636 expression=key, 637 ), 638 } 639 640 EXPRESSION_PARSERS = { 641 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 642 exp.Column: lambda self: self._parse_column(), 643 exp.Condition: lambda self: self._parse_conjunction(), 644 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 645 exp.Expression: lambda self: self._parse_expression(), 646 exp.From: lambda self: self._parse_from(joins=True), 647 exp.Group: lambda self: self._parse_group(), 648 exp.Having: lambda self: self._parse_having(), 649 exp.Identifier: lambda self: self._parse_id_var(), 650 exp.Join: lambda self: self._parse_join(), 651 exp.Lambda: lambda self: self._parse_lambda(), 652 exp.Lateral: lambda self: self._parse_lateral(), 653 exp.Limit: lambda self: self._parse_limit(), 654 exp.Offset: lambda self: self._parse_offset(), 655 exp.Order: lambda self: self._parse_order(), 656 exp.Ordered: lambda self: self._parse_ordered(), 657 exp.Properties: lambda self: self._parse_properties(), 658 exp.Qualify: lambda self: self._parse_qualify(), 659 exp.Returning: lambda self: self._parse_returning(), 660 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 661 exp.Table: lambda self: self._parse_table_parts(), 662 exp.TableAlias: lambda self: self._parse_table_alias(), 663 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 664 exp.Where: lambda self: self._parse_where(), 665 exp.Window: lambda self: self._parse_named_window(), 666 exp.With: lambda self: self._parse_with(), 667 "JOIN_TYPE": lambda self: self._parse_join_parts(), 668 } 669 670 STATEMENT_PARSERS = { 671 TokenType.ALTER: lambda self: self._parse_alter(), 672 TokenType.BEGIN: lambda self: self._parse_transaction(), 673 TokenType.CACHE: lambda self: self._parse_cache(), 674 TokenType.COMMENT: lambda self: self._parse_comment(), 675 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 676 TokenType.COPY: lambda self: self._parse_copy(), 677 TokenType.CREATE: lambda self: self._parse_create(), 678 TokenType.DELETE: lambda self: self._parse_delete(), 679 TokenType.DESC: lambda self: self._parse_describe(), 680 TokenType.DESCRIBE: lambda self: self._parse_describe(), 681 TokenType.DROP: lambda self: self._parse_drop(), 682 TokenType.INSERT: lambda self: self._parse_insert(), 683 TokenType.KILL: lambda self: self._parse_kill(), 684 TokenType.LOAD: lambda self: self._parse_load(), 685 TokenType.MERGE: lambda self: self._parse_merge(), 686 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 687 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 688 TokenType.REFRESH: lambda self: self._parse_refresh(), 689 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 690 TokenType.SET: lambda self: self._parse_set(), 691 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 692 TokenType.UNCACHE: lambda self: self._parse_uncache(), 693 TokenType.UPDATE: lambda self: self._parse_update(), 694 TokenType.USE: lambda self: self.expression( 695 exp.Use, 696 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 697 this=self._parse_table(schema=False), 698 ), 699 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 700 } 701 702 UNARY_PARSERS = { 703 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 704 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 705 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 706 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 707 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 708 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 709 } 710 711 STRING_PARSERS = { 712 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 713 exp.RawString, this=token.text 714 ), 715 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 716 exp.National, this=token.text 717 ), 718 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 719 TokenType.STRING: lambda self, token: self.expression( 720 exp.Literal, this=token.text, is_string=True 721 ), 722 TokenType.UNICODE_STRING: lambda self, token: self.expression( 723 exp.UnicodeString, 724 this=token.text, 725 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 726 ), 727 } 728 729 NUMERIC_PARSERS = { 730 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 731 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 732 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 733 TokenType.NUMBER: lambda self, token: self.expression( 734 exp.Literal, this=token.text, is_string=False 735 ), 736 } 737 738 PRIMARY_PARSERS = { 739 **STRING_PARSERS, 740 **NUMERIC_PARSERS, 741 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 742 TokenType.NULL: lambda self, _: self.expression(exp.Null), 743 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 744 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 745 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 746 TokenType.STAR: lambda self, _: self.expression( 747 exp.Star, 748 **{ 749 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 750 "replace": self._parse_star_op("REPLACE"), 751 "rename": self._parse_star_op("RENAME"), 752 }, 753 ), 754 } 755 756 PLACEHOLDER_PARSERS = { 757 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 758 TokenType.PARAMETER: lambda self: self._parse_parameter(), 759 TokenType.COLON: lambda self: ( 760 self.expression(exp.Placeholder, this=self._prev.text) 761 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 762 else None 763 ), 764 } 765 766 RANGE_PARSERS = { 767 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 768 TokenType.GLOB: binary_range_parser(exp.Glob), 769 TokenType.ILIKE: binary_range_parser(exp.ILike), 770 TokenType.IN: lambda self, this: self._parse_in(this), 771 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 772 TokenType.IS: lambda self, this: self._parse_is(this), 773 TokenType.LIKE: binary_range_parser(exp.Like), 774 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 775 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 776 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 777 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 778 } 779 780 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 781 "ALLOWED_VALUES": lambda self: self.expression( 782 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 783 ), 784 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 785 "AUTO": lambda self: self._parse_auto_property(), 786 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 787 "BACKUP": lambda self: self.expression( 788 exp.BackupProperty, this=self._parse_var(any_token=True) 789 ), 790 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 791 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 792 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 793 "CHECKSUM": lambda self: self._parse_checksum(), 794 "CLUSTER BY": lambda self: self._parse_cluster(), 795 "CLUSTERED": lambda self: self._parse_clustered_by(), 796 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 797 exp.CollateProperty, **kwargs 798 ), 799 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 800 "CONTAINS": lambda self: self._parse_contains_property(), 801 "COPY": lambda self: self._parse_copy_property(), 802 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 803 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 804 "DEFINER": lambda self: self._parse_definer(), 805 "DETERMINISTIC": lambda self: self.expression( 806 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 807 ), 808 "DISTKEY": lambda self: self._parse_distkey(), 809 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 810 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 811 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 812 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 813 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 814 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 815 "FREESPACE": lambda self: self._parse_freespace(), 816 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 817 "HEAP": lambda self: self.expression(exp.HeapProperty), 818 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 819 "IMMUTABLE": lambda self: self.expression( 820 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 821 ), 822 "INHERITS": lambda self: self.expression( 823 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 824 ), 825 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 826 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 827 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 828 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 829 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 830 "LIKE": lambda self: self._parse_create_like(), 831 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 832 "LOCK": lambda self: self._parse_locking(), 833 "LOCKING": lambda self: self._parse_locking(), 834 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 835 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 836 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 837 "MODIFIES": lambda self: self._parse_modifies_property(), 838 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 839 "NO": lambda self: self._parse_no_property(), 840 "ON": lambda self: self._parse_on_property(), 841 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 842 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 843 "PARTITION": lambda self: self._parse_partitioned_of(), 844 "PARTITION BY": lambda self: self._parse_partitioned_by(), 845 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 846 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 847 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 848 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 849 "READS": lambda self: self._parse_reads_property(), 850 "REMOTE": lambda self: self._parse_remote_with_connection(), 851 "RETURNS": lambda self: self._parse_returns(), 852 "STRICT": lambda self: self.expression(exp.StrictProperty), 853 "ROW": lambda self: self._parse_row(), 854 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 855 "SAMPLE": lambda self: self.expression( 856 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 857 ), 858 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 859 "SETTINGS": lambda self: self.expression( 860 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 861 ), 862 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 863 "SORTKEY": lambda self: self._parse_sortkey(), 864 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 865 "STABLE": lambda self: self.expression( 866 exp.StabilityProperty, this=exp.Literal.string("STABLE") 867 ), 868 "STORED": lambda self: self._parse_stored(), 869 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 870 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 871 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 872 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 873 "TO": lambda self: self._parse_to_table(), 874 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 875 "TRANSFORM": lambda self: self.expression( 876 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 877 ), 878 "TTL": lambda self: self._parse_ttl(), 879 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 880 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 881 "VOLATILE": lambda self: self._parse_volatile_property(), 882 "WITH": lambda self: self._parse_with_property(), 883 } 884 885 CONSTRAINT_PARSERS = { 886 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 887 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 888 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 889 "CHARACTER SET": lambda self: self.expression( 890 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 891 ), 892 "CHECK": lambda self: self.expression( 893 exp.CheckColumnConstraint, 894 this=self._parse_wrapped(self._parse_conjunction), 895 enforced=self._match_text_seq("ENFORCED"), 896 ), 897 "COLLATE": lambda self: self.expression( 898 exp.CollateColumnConstraint, this=self._parse_var() 899 ), 900 "COMMENT": lambda self: self.expression( 901 exp.CommentColumnConstraint, this=self._parse_string() 902 ), 903 "COMPRESS": lambda self: self._parse_compress(), 904 "CLUSTERED": lambda self: self.expression( 905 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 906 ), 907 "NONCLUSTERED": lambda self: self.expression( 908 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 909 ), 910 "DEFAULT": lambda self: self.expression( 911 exp.DefaultColumnConstraint, this=self._parse_bitwise() 912 ), 913 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 914 "EPHEMERAL": lambda self: self.expression( 915 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 916 ), 917 "EXCLUDE": lambda self: self.expression( 918 exp.ExcludeColumnConstraint, this=self._parse_index_params() 919 ), 920 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 921 "FORMAT": lambda self: self.expression( 922 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 923 ), 924 "GENERATED": lambda self: self._parse_generated_as_identity(), 925 "IDENTITY": lambda self: self._parse_auto_increment(), 926 "INLINE": lambda self: self._parse_inline(), 927 "LIKE": lambda self: self._parse_create_like(), 928 "NOT": lambda self: self._parse_not_constraint(), 929 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 930 "ON": lambda self: ( 931 self._match(TokenType.UPDATE) 932 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 933 ) 934 or self.expression(exp.OnProperty, this=self._parse_id_var()), 935 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 936 "PERIOD": lambda self: self._parse_period_for_system_time(), 937 "PRIMARY KEY": lambda self: self._parse_primary_key(), 938 "REFERENCES": lambda self: self._parse_references(match=False), 939 "TITLE": lambda self: self.expression( 940 exp.TitleColumnConstraint, this=self._parse_var_or_string() 941 ), 942 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 943 "UNIQUE": lambda self: self._parse_unique(), 944 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 945 "WITH": lambda self: self.expression( 946 exp.Properties, expressions=self._parse_wrapped_properties() 947 ), 948 } 949 950 ALTER_PARSERS = { 951 "ADD": lambda self: self._parse_alter_table_add(), 952 "ALTER": lambda self: self._parse_alter_table_alter(), 953 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 954 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 955 "DROP": lambda self: self._parse_alter_table_drop(), 956 "RENAME": lambda self: self._parse_alter_table_rename(), 957 "SET": lambda self: self._parse_alter_table_set(), 958 } 959 960 ALTER_ALTER_PARSERS = { 961 "DISTKEY": lambda self: self._parse_alter_diststyle(), 962 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 963 "SORTKEY": lambda self: self._parse_alter_sortkey(), 964 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 965 } 966 967 SCHEMA_UNNAMED_CONSTRAINTS = { 968 "CHECK", 969 "EXCLUDE", 970 "FOREIGN KEY", 971 "LIKE", 972 "PERIOD", 973 "PRIMARY KEY", 974 "UNIQUE", 975 } 976 977 NO_PAREN_FUNCTION_PARSERS = { 978 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 979 "CASE": lambda self: self._parse_case(), 980 "IF": lambda self: self._parse_if(), 981 "NEXT": lambda self: self._parse_next_value_for(), 982 } 983 984 INVALID_FUNC_NAME_TOKENS = { 985 TokenType.IDENTIFIER, 986 TokenType.STRING, 987 } 988 989 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 990 991 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 992 993 FUNCTION_PARSERS = { 994 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 995 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 996 "DECODE": lambda self: self._parse_decode(), 997 "EXTRACT": lambda self: self._parse_extract(), 998 "JSON_OBJECT": lambda self: self._parse_json_object(), 999 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1000 "JSON_TABLE": lambda self: self._parse_json_table(), 1001 "MATCH": lambda self: self._parse_match_against(), 1002 "OPENJSON": lambda self: self._parse_open_json(), 1003 "POSITION": lambda self: self._parse_position(), 1004 "PREDICT": lambda self: self._parse_predict(), 1005 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1006 "STRING_AGG": lambda self: self._parse_string_agg(), 1007 "SUBSTRING": lambda self: self._parse_substring(), 1008 "TRIM": lambda self: self._parse_trim(), 1009 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1010 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1011 } 1012 1013 QUERY_MODIFIER_PARSERS = { 1014 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1015 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1016 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1017 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1018 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1019 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1020 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1021 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1022 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1023 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1024 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1025 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1026 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1027 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1028 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1029 TokenType.CLUSTER_BY: lambda self: ( 1030 "cluster", 1031 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1032 ), 1033 TokenType.DISTRIBUTE_BY: lambda self: ( 1034 "distribute", 1035 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1036 ), 1037 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1038 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1039 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1040 } 1041 1042 SET_PARSERS = { 1043 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1044 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1045 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1046 "TRANSACTION": lambda self: self._parse_set_transaction(), 1047 } 1048 1049 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1050 1051 TYPE_LITERAL_PARSERS = { 1052 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1053 } 1054 1055 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1056 1057 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1058 1059 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1060 1061 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1062 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1063 "ISOLATION": ( 1064 ("LEVEL", "REPEATABLE", "READ"), 1065 ("LEVEL", "READ", "COMMITTED"), 1066 ("LEVEL", "READ", "UNCOMITTED"), 1067 ("LEVEL", "SERIALIZABLE"), 1068 ), 1069 "READ": ("WRITE", "ONLY"), 1070 } 1071 1072 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1073 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1074 ) 1075 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1076 1077 CREATE_SEQUENCE: OPTIONS_TYPE = { 1078 "SCALE": ("EXTEND", "NOEXTEND"), 1079 "SHARD": ("EXTEND", "NOEXTEND"), 1080 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1081 **dict.fromkeys( 1082 ( 1083 "SESSION", 1084 "GLOBAL", 1085 "KEEP", 1086 "NOKEEP", 1087 "ORDER", 1088 "NOORDER", 1089 "NOCACHE", 1090 "CYCLE", 1091 "NOCYCLE", 1092 "NOMINVALUE", 1093 "NOMAXVALUE", 1094 "NOSCALE", 1095 "NOSHARD", 1096 ), 1097 tuple(), 1098 ), 1099 } 1100 1101 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1102 1103 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1104 1105 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1106 1107 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1108 1109 CLONE_KEYWORDS = {"CLONE", "COPY"} 1110 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1111 1112 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1113 1114 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1115 1116 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1117 1118 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1119 1120 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1121 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1122 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1123 1124 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1125 1126 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1127 1128 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1129 1130 DISTINCT_TOKENS = {TokenType.DISTINCT} 1131 1132 NULL_TOKENS = {TokenType.NULL} 1133 1134 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1135 1136 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1137 1138 STRICT_CAST = True 1139 1140 PREFIXED_PIVOT_COLUMNS = False 1141 IDENTIFY_PIVOT_STRINGS = False 1142 1143 LOG_DEFAULTS_TO_LN = False 1144 1145 # Whether ADD is present for each column added by ALTER TABLE 1146 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1147 1148 # Whether the table sample clause expects CSV syntax 1149 TABLESAMPLE_CSV = False 1150 1151 # The default method used for table sampling 1152 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1153 1154 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1155 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1156 1157 # Whether the TRIM function expects the characters to trim as its first argument 1158 TRIM_PATTERN_FIRST = False 1159 1160 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1161 STRING_ALIASES = False 1162 1163 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1164 MODIFIERS_ATTACHED_TO_UNION = True 1165 UNION_MODIFIERS = {"order", "limit", "offset"} 1166 1167 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1168 NO_PAREN_IF_COMMANDS = True 1169 1170 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1171 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1172 1173 # Whether the `:` operator is used to extract a value from a JSON document 1174 COLON_IS_JSON_EXTRACT = False 1175 1176 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1177 # If this is True and '(' is not found, the keyword will be treated as an identifier 1178 VALUES_FOLLOWED_BY_PAREN = True 1179 1180 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1181 SUPPORTS_IMPLICIT_UNNEST = False 1182 1183 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1184 INTERVAL_SPANS = True 1185 1186 # Whether a PARTITION clause can follow a table reference 1187 SUPPORTS_PARTITION_SELECTION = False 1188 1189 __slots__ = ( 1190 "error_level", 1191 "error_message_context", 1192 "max_errors", 1193 "dialect", 1194 "sql", 1195 "errors", 1196 "_tokens", 1197 "_index", 1198 "_curr", 1199 "_next", 1200 "_prev", 1201 "_prev_comments", 1202 ) 1203 1204 # Autofilled 1205 SHOW_TRIE: t.Dict = {} 1206 SET_TRIE: t.Dict = {} 1207 1208 def __init__( 1209 self, 1210 error_level: t.Optional[ErrorLevel] = None, 1211 error_message_context: int = 100, 1212 max_errors: int = 3, 1213 dialect: DialectType = None, 1214 ): 1215 from sqlglot.dialects import Dialect 1216 1217 self.error_level = error_level or ErrorLevel.IMMEDIATE 1218 self.error_message_context = error_message_context 1219 self.max_errors = max_errors 1220 self.dialect = Dialect.get_or_raise(dialect) 1221 self.reset() 1222 1223 def reset(self): 1224 self.sql = "" 1225 self.errors = [] 1226 self._tokens = [] 1227 self._index = 0 1228 self._curr = None 1229 self._next = None 1230 self._prev = None 1231 self._prev_comments = None 1232 1233 def parse( 1234 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1235 ) -> t.List[t.Optional[exp.Expression]]: 1236 """ 1237 Parses a list of tokens and returns a list of syntax trees, one tree 1238 per parsed SQL statement. 1239 1240 Args: 1241 raw_tokens: The list of tokens. 1242 sql: The original SQL string, used to produce helpful debug messages. 1243 1244 Returns: 1245 The list of the produced syntax trees. 1246 """ 1247 return self._parse( 1248 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1249 ) 1250 1251 def parse_into( 1252 self, 1253 expression_types: exp.IntoType, 1254 raw_tokens: t.List[Token], 1255 sql: t.Optional[str] = None, 1256 ) -> t.List[t.Optional[exp.Expression]]: 1257 """ 1258 Parses a list of tokens into a given Expression type. If a collection of Expression 1259 types is given instead, this method will try to parse the token list into each one 1260 of them, stopping at the first for which the parsing succeeds. 1261 1262 Args: 1263 expression_types: The expression type(s) to try and parse the token list into. 1264 raw_tokens: The list of tokens. 1265 sql: The original SQL string, used to produce helpful debug messages. 1266 1267 Returns: 1268 The target Expression. 1269 """ 1270 errors = [] 1271 for expression_type in ensure_list(expression_types): 1272 parser = self.EXPRESSION_PARSERS.get(expression_type) 1273 if not parser: 1274 raise TypeError(f"No parser registered for {expression_type}") 1275 1276 try: 1277 return self._parse(parser, raw_tokens, sql) 1278 except ParseError as e: 1279 e.errors[0]["into_expression"] = expression_type 1280 errors.append(e) 1281 1282 raise ParseError( 1283 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1284 errors=merge_errors(errors), 1285 ) from errors[-1] 1286 1287 def _parse( 1288 self, 1289 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1290 raw_tokens: t.List[Token], 1291 sql: t.Optional[str] = None, 1292 ) -> t.List[t.Optional[exp.Expression]]: 1293 self.reset() 1294 self.sql = sql or "" 1295 1296 total = len(raw_tokens) 1297 chunks: t.List[t.List[Token]] = [[]] 1298 1299 for i, token in enumerate(raw_tokens): 1300 if token.token_type == TokenType.SEMICOLON: 1301 if token.comments: 1302 chunks.append([token]) 1303 1304 if i < total - 1: 1305 chunks.append([]) 1306 else: 1307 chunks[-1].append(token) 1308 1309 expressions = [] 1310 1311 for tokens in chunks: 1312 self._index = -1 1313 self._tokens = tokens 1314 self._advance() 1315 1316 expressions.append(parse_method(self)) 1317 1318 if self._index < len(self._tokens): 1319 self.raise_error("Invalid expression / Unexpected token") 1320 1321 self.check_errors() 1322 1323 return expressions 1324 1325 def check_errors(self) -> None: 1326 """Logs or raises any found errors, depending on the chosen error level setting.""" 1327 if self.error_level == ErrorLevel.WARN: 1328 for error in self.errors: 1329 logger.error(str(error)) 1330 elif self.error_level == ErrorLevel.RAISE and self.errors: 1331 raise ParseError( 1332 concat_messages(self.errors, self.max_errors), 1333 errors=merge_errors(self.errors), 1334 ) 1335 1336 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1337 """ 1338 Appends an error in the list of recorded errors or raises it, depending on the chosen 1339 error level setting. 1340 """ 1341 token = token or self._curr or self._prev or Token.string("") 1342 start = token.start 1343 end = token.end + 1 1344 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1345 highlight = self.sql[start:end] 1346 end_context = self.sql[end : end + self.error_message_context] 1347 1348 error = ParseError.new( 1349 f"{message}. Line {token.line}, Col: {token.col}.\n" 1350 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1351 description=message, 1352 line=token.line, 1353 col=token.col, 1354 start_context=start_context, 1355 highlight=highlight, 1356 end_context=end_context, 1357 ) 1358 1359 if self.error_level == ErrorLevel.IMMEDIATE: 1360 raise error 1361 1362 self.errors.append(error) 1363 1364 def expression( 1365 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1366 ) -> E: 1367 """ 1368 Creates a new, validated Expression. 1369 1370 Args: 1371 exp_class: The expression class to instantiate. 1372 comments: An optional list of comments to attach to the expression. 1373 kwargs: The arguments to set for the expression along with their respective values. 1374 1375 Returns: 1376 The target expression. 1377 """ 1378 instance = exp_class(**kwargs) 1379 instance.add_comments(comments) if comments else self._add_comments(instance) 1380 return self.validate_expression(instance) 1381 1382 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1383 if expression and self._prev_comments: 1384 expression.add_comments(self._prev_comments) 1385 self._prev_comments = None 1386 1387 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1388 """ 1389 Validates an Expression, making sure that all its mandatory arguments are set. 1390 1391 Args: 1392 expression: The expression to validate. 1393 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1394 1395 Returns: 1396 The validated expression. 1397 """ 1398 if self.error_level != ErrorLevel.IGNORE: 1399 for error_message in expression.error_messages(args): 1400 self.raise_error(error_message) 1401 1402 return expression 1403 1404 def _find_sql(self, start: Token, end: Token) -> str: 1405 return self.sql[start.start : end.end + 1] 1406 1407 def _is_connected(self) -> bool: 1408 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1409 1410 def _advance(self, times: int = 1) -> None: 1411 self._index += times 1412 self._curr = seq_get(self._tokens, self._index) 1413 self._next = seq_get(self._tokens, self._index + 1) 1414 1415 if self._index > 0: 1416 self._prev = self._tokens[self._index - 1] 1417 self._prev_comments = self._prev.comments 1418 else: 1419 self._prev = None 1420 self._prev_comments = None 1421 1422 def _retreat(self, index: int) -> None: 1423 if index != self._index: 1424 self._advance(index - self._index) 1425 1426 def _warn_unsupported(self) -> None: 1427 if len(self._tokens) <= 1: 1428 return 1429 1430 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1431 # interested in emitting a warning for the one being currently processed. 1432 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1433 1434 logger.warning( 1435 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1436 ) 1437 1438 def _parse_command(self) -> exp.Command: 1439 self._warn_unsupported() 1440 return self.expression( 1441 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1442 ) 1443 1444 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1445 """ 1446 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1447 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1448 the parser state accordingly 1449 """ 1450 index = self._index 1451 error_level = self.error_level 1452 1453 self.error_level = ErrorLevel.IMMEDIATE 1454 try: 1455 this = parse_method() 1456 except ParseError: 1457 this = None 1458 finally: 1459 if not this or retreat: 1460 self._retreat(index) 1461 self.error_level = error_level 1462 1463 return this 1464 1465 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1466 start = self._prev 1467 exists = self._parse_exists() if allow_exists else None 1468 1469 self._match(TokenType.ON) 1470 1471 materialized = self._match_text_seq("MATERIALIZED") 1472 kind = self._match_set(self.CREATABLES) and self._prev 1473 if not kind: 1474 return self._parse_as_command(start) 1475 1476 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1477 this = self._parse_user_defined_function(kind=kind.token_type) 1478 elif kind.token_type == TokenType.TABLE: 1479 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1480 elif kind.token_type == TokenType.COLUMN: 1481 this = self._parse_column() 1482 else: 1483 this = self._parse_id_var() 1484 1485 self._match(TokenType.IS) 1486 1487 return self.expression( 1488 exp.Comment, 1489 this=this, 1490 kind=kind.text, 1491 expression=self._parse_string(), 1492 exists=exists, 1493 materialized=materialized, 1494 ) 1495 1496 def _parse_to_table( 1497 self, 1498 ) -> exp.ToTableProperty: 1499 table = self._parse_table_parts(schema=True) 1500 return self.expression(exp.ToTableProperty, this=table) 1501 1502 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1503 def _parse_ttl(self) -> exp.Expression: 1504 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1505 this = self._parse_bitwise() 1506 1507 if self._match_text_seq("DELETE"): 1508 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1509 if self._match_text_seq("RECOMPRESS"): 1510 return self.expression( 1511 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1512 ) 1513 if self._match_text_seq("TO", "DISK"): 1514 return self.expression( 1515 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1516 ) 1517 if self._match_text_seq("TO", "VOLUME"): 1518 return self.expression( 1519 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1520 ) 1521 1522 return this 1523 1524 expressions = self._parse_csv(_parse_ttl_action) 1525 where = self._parse_where() 1526 group = self._parse_group() 1527 1528 aggregates = None 1529 if group and self._match(TokenType.SET): 1530 aggregates = self._parse_csv(self._parse_set_item) 1531 1532 return self.expression( 1533 exp.MergeTreeTTL, 1534 expressions=expressions, 1535 where=where, 1536 group=group, 1537 aggregates=aggregates, 1538 ) 1539 1540 def _parse_statement(self) -> t.Optional[exp.Expression]: 1541 if self._curr is None: 1542 return None 1543 1544 if self._match_set(self.STATEMENT_PARSERS): 1545 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1546 1547 if self._match_set(self.dialect.tokenizer.COMMANDS): 1548 return self._parse_command() 1549 1550 expression = self._parse_expression() 1551 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1552 return self._parse_query_modifiers(expression) 1553 1554 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1555 start = self._prev 1556 temporary = self._match(TokenType.TEMPORARY) 1557 materialized = self._match_text_seq("MATERIALIZED") 1558 1559 kind = self._match_set(self.CREATABLES) and self._prev.text 1560 if not kind: 1561 return self._parse_as_command(start) 1562 1563 if_exists = exists or self._parse_exists() 1564 table = self._parse_table_parts( 1565 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1566 ) 1567 1568 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1569 1570 if self._match(TokenType.L_PAREN, advance=False): 1571 expressions = self._parse_wrapped_csv(self._parse_types) 1572 else: 1573 expressions = None 1574 1575 return self.expression( 1576 exp.Drop, 1577 comments=start.comments, 1578 exists=if_exists, 1579 this=table, 1580 expressions=expressions, 1581 kind=kind.upper(), 1582 temporary=temporary, 1583 materialized=materialized, 1584 cascade=self._match_text_seq("CASCADE"), 1585 constraints=self._match_text_seq("CONSTRAINTS"), 1586 purge=self._match_text_seq("PURGE"), 1587 cluster=cluster, 1588 ) 1589 1590 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1591 return ( 1592 self._match_text_seq("IF") 1593 and (not not_ or self._match(TokenType.NOT)) 1594 and self._match(TokenType.EXISTS) 1595 ) 1596 1597 def _parse_create(self) -> exp.Create | exp.Command: 1598 # Note: this can't be None because we've matched a statement parser 1599 start = self._prev 1600 comments = self._prev_comments 1601 1602 replace = ( 1603 start.token_type == TokenType.REPLACE 1604 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1605 or self._match_pair(TokenType.OR, TokenType.ALTER) 1606 ) 1607 1608 unique = self._match(TokenType.UNIQUE) 1609 1610 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1611 self._advance() 1612 1613 properties = None 1614 create_token = self._match_set(self.CREATABLES) and self._prev 1615 1616 if not create_token: 1617 # exp.Properties.Location.POST_CREATE 1618 properties = self._parse_properties() 1619 create_token = self._match_set(self.CREATABLES) and self._prev 1620 1621 if not properties or not create_token: 1622 return self._parse_as_command(start) 1623 1624 exists = self._parse_exists(not_=True) 1625 this = None 1626 expression: t.Optional[exp.Expression] = None 1627 indexes = None 1628 no_schema_binding = None 1629 begin = None 1630 end = None 1631 clone = None 1632 1633 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1634 nonlocal properties 1635 if properties and temp_props: 1636 properties.expressions.extend(temp_props.expressions) 1637 elif temp_props: 1638 properties = temp_props 1639 1640 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1641 this = self._parse_user_defined_function(kind=create_token.token_type) 1642 1643 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1644 extend_props(self._parse_properties()) 1645 1646 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1647 extend_props(self._parse_properties()) 1648 1649 if not expression: 1650 if self._match(TokenType.COMMAND): 1651 expression = self._parse_as_command(self._prev) 1652 else: 1653 begin = self._match(TokenType.BEGIN) 1654 return_ = self._match_text_seq("RETURN") 1655 1656 if self._match(TokenType.STRING, advance=False): 1657 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1658 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1659 expression = self._parse_string() 1660 extend_props(self._parse_properties()) 1661 else: 1662 expression = self._parse_statement() 1663 1664 end = self._match_text_seq("END") 1665 1666 if return_: 1667 expression = self.expression(exp.Return, this=expression) 1668 elif create_token.token_type == TokenType.INDEX: 1669 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1670 if not self._match(TokenType.ON): 1671 index = self._parse_id_var() 1672 anonymous = False 1673 else: 1674 index = None 1675 anonymous = True 1676 1677 this = self._parse_index(index=index, anonymous=anonymous) 1678 elif create_token.token_type in self.DB_CREATABLES: 1679 table_parts = self._parse_table_parts( 1680 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1681 ) 1682 1683 # exp.Properties.Location.POST_NAME 1684 self._match(TokenType.COMMA) 1685 extend_props(self._parse_properties(before=True)) 1686 1687 this = self._parse_schema(this=table_parts) 1688 1689 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1690 extend_props(self._parse_properties()) 1691 1692 self._match(TokenType.ALIAS) 1693 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1694 # exp.Properties.Location.POST_ALIAS 1695 extend_props(self._parse_properties()) 1696 1697 if create_token.token_type == TokenType.SEQUENCE: 1698 expression = self._parse_types() 1699 extend_props(self._parse_properties()) 1700 else: 1701 expression = self._parse_ddl_select() 1702 1703 if create_token.token_type == TokenType.TABLE: 1704 # exp.Properties.Location.POST_EXPRESSION 1705 extend_props(self._parse_properties()) 1706 1707 indexes = [] 1708 while True: 1709 index = self._parse_index() 1710 1711 # exp.Properties.Location.POST_INDEX 1712 extend_props(self._parse_properties()) 1713 1714 if not index: 1715 break 1716 else: 1717 self._match(TokenType.COMMA) 1718 indexes.append(index) 1719 elif create_token.token_type == TokenType.VIEW: 1720 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1721 no_schema_binding = True 1722 1723 shallow = self._match_text_seq("SHALLOW") 1724 1725 if self._match_texts(self.CLONE_KEYWORDS): 1726 copy = self._prev.text.lower() == "copy" 1727 clone = self.expression( 1728 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1729 ) 1730 1731 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1732 return self._parse_as_command(start) 1733 1734 return self.expression( 1735 exp.Create, 1736 comments=comments, 1737 this=this, 1738 kind=create_token.text.upper(), 1739 replace=replace, 1740 unique=unique, 1741 expression=expression, 1742 exists=exists, 1743 properties=properties, 1744 indexes=indexes, 1745 no_schema_binding=no_schema_binding, 1746 begin=begin, 1747 end=end, 1748 clone=clone, 1749 ) 1750 1751 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1752 seq = exp.SequenceProperties() 1753 1754 options = [] 1755 index = self._index 1756 1757 while self._curr: 1758 self._match(TokenType.COMMA) 1759 if self._match_text_seq("INCREMENT"): 1760 self._match_text_seq("BY") 1761 self._match_text_seq("=") 1762 seq.set("increment", self._parse_term()) 1763 elif self._match_text_seq("MINVALUE"): 1764 seq.set("minvalue", self._parse_term()) 1765 elif self._match_text_seq("MAXVALUE"): 1766 seq.set("maxvalue", self._parse_term()) 1767 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1768 self._match_text_seq("=") 1769 seq.set("start", self._parse_term()) 1770 elif self._match_text_seq("CACHE"): 1771 # T-SQL allows empty CACHE which is initialized dynamically 1772 seq.set("cache", self._parse_number() or True) 1773 elif self._match_text_seq("OWNED", "BY"): 1774 # "OWNED BY NONE" is the default 1775 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1776 else: 1777 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1778 if opt: 1779 options.append(opt) 1780 else: 1781 break 1782 1783 seq.set("options", options if options else None) 1784 return None if self._index == index else seq 1785 1786 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1787 # only used for teradata currently 1788 self._match(TokenType.COMMA) 1789 1790 kwargs = { 1791 "no": self._match_text_seq("NO"), 1792 "dual": self._match_text_seq("DUAL"), 1793 "before": self._match_text_seq("BEFORE"), 1794 "default": self._match_text_seq("DEFAULT"), 1795 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1796 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1797 "after": self._match_text_seq("AFTER"), 1798 "minimum": self._match_texts(("MIN", "MINIMUM")), 1799 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1800 } 1801 1802 if self._match_texts(self.PROPERTY_PARSERS): 1803 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1804 try: 1805 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1806 except TypeError: 1807 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1808 1809 return None 1810 1811 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1812 return self._parse_wrapped_csv(self._parse_property) 1813 1814 def _parse_property(self) -> t.Optional[exp.Expression]: 1815 if self._match_texts(self.PROPERTY_PARSERS): 1816 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1817 1818 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1819 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1820 1821 if self._match_text_seq("COMPOUND", "SORTKEY"): 1822 return self._parse_sortkey(compound=True) 1823 1824 if self._match_text_seq("SQL", "SECURITY"): 1825 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1826 1827 index = self._index 1828 key = self._parse_column() 1829 1830 if not self._match(TokenType.EQ): 1831 self._retreat(index) 1832 return self._parse_sequence_properties() 1833 1834 return self.expression( 1835 exp.Property, 1836 this=key.to_dot() if isinstance(key, exp.Column) else key, 1837 value=self._parse_bitwise() or self._parse_var(any_token=True), 1838 ) 1839 1840 def _parse_stored(self) -> exp.FileFormatProperty: 1841 self._match(TokenType.ALIAS) 1842 1843 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1844 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1845 1846 return self.expression( 1847 exp.FileFormatProperty, 1848 this=( 1849 self.expression( 1850 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1851 ) 1852 if input_format or output_format 1853 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1854 ), 1855 ) 1856 1857 def _parse_unquoted_field(self): 1858 field = self._parse_field() 1859 if isinstance(field, exp.Identifier) and not field.quoted: 1860 field = exp.var(field) 1861 1862 return field 1863 1864 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1865 self._match(TokenType.EQ) 1866 self._match(TokenType.ALIAS) 1867 1868 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1869 1870 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1871 properties = [] 1872 while True: 1873 if before: 1874 prop = self._parse_property_before() 1875 else: 1876 prop = self._parse_property() 1877 if not prop: 1878 break 1879 for p in ensure_list(prop): 1880 properties.append(p) 1881 1882 if properties: 1883 return self.expression(exp.Properties, expressions=properties) 1884 1885 return None 1886 1887 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1888 return self.expression( 1889 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1890 ) 1891 1892 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1893 if self._index >= 2: 1894 pre_volatile_token = self._tokens[self._index - 2] 1895 else: 1896 pre_volatile_token = None 1897 1898 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1899 return exp.VolatileProperty() 1900 1901 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1902 1903 def _parse_retention_period(self) -> exp.Var: 1904 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1905 number = self._parse_number() 1906 number_str = f"{number} " if number else "" 1907 unit = self._parse_var(any_token=True) 1908 return exp.var(f"{number_str}{unit}") 1909 1910 def _parse_system_versioning_property( 1911 self, with_: bool = False 1912 ) -> exp.WithSystemVersioningProperty: 1913 self._match(TokenType.EQ) 1914 prop = self.expression( 1915 exp.WithSystemVersioningProperty, 1916 **{ # type: ignore 1917 "on": True, 1918 "with": with_, 1919 }, 1920 ) 1921 1922 if self._match_text_seq("OFF"): 1923 prop.set("on", False) 1924 return prop 1925 1926 self._match(TokenType.ON) 1927 if self._match(TokenType.L_PAREN): 1928 while self._curr and not self._match(TokenType.R_PAREN): 1929 if self._match_text_seq("HISTORY_TABLE", "="): 1930 prop.set("this", self._parse_table_parts()) 1931 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1932 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1933 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1934 prop.set("retention_period", self._parse_retention_period()) 1935 1936 self._match(TokenType.COMMA) 1937 1938 return prop 1939 1940 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1941 self._match(TokenType.EQ) 1942 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1943 prop = self.expression(exp.DataDeletionProperty, on=on) 1944 1945 if self._match(TokenType.L_PAREN): 1946 while self._curr and not self._match(TokenType.R_PAREN): 1947 if self._match_text_seq("FILTER_COLUMN", "="): 1948 prop.set("filter_column", self._parse_column()) 1949 elif self._match_text_seq("RETENTION_PERIOD", "="): 1950 prop.set("retention_period", self._parse_retention_period()) 1951 1952 self._match(TokenType.COMMA) 1953 1954 return prop 1955 1956 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1957 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1958 prop = self._parse_system_versioning_property(with_=True) 1959 self._match_r_paren() 1960 return prop 1961 1962 if self._match(TokenType.L_PAREN, advance=False): 1963 return self._parse_wrapped_properties() 1964 1965 if self._match_text_seq("JOURNAL"): 1966 return self._parse_withjournaltable() 1967 1968 if self._match_texts(self.VIEW_ATTRIBUTES): 1969 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1970 1971 if self._match_text_seq("DATA"): 1972 return self._parse_withdata(no=False) 1973 elif self._match_text_seq("NO", "DATA"): 1974 return self._parse_withdata(no=True) 1975 1976 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1977 return self._parse_serde_properties(with_=True) 1978 1979 if not self._next: 1980 return None 1981 1982 return self._parse_withisolatedloading() 1983 1984 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1985 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1986 self._match(TokenType.EQ) 1987 1988 user = self._parse_id_var() 1989 self._match(TokenType.PARAMETER) 1990 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1991 1992 if not user or not host: 1993 return None 1994 1995 return exp.DefinerProperty(this=f"{user}@{host}") 1996 1997 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1998 self._match(TokenType.TABLE) 1999 self._match(TokenType.EQ) 2000 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2001 2002 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2003 return self.expression(exp.LogProperty, no=no) 2004 2005 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2006 return self.expression(exp.JournalProperty, **kwargs) 2007 2008 def _parse_checksum(self) -> exp.ChecksumProperty: 2009 self._match(TokenType.EQ) 2010 2011 on = None 2012 if self._match(TokenType.ON): 2013 on = True 2014 elif self._match_text_seq("OFF"): 2015 on = False 2016 2017 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2018 2019 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2020 return self.expression( 2021 exp.Cluster, 2022 expressions=( 2023 self._parse_wrapped_csv(self._parse_ordered) 2024 if wrapped 2025 else self._parse_csv(self._parse_ordered) 2026 ), 2027 ) 2028 2029 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2030 self._match_text_seq("BY") 2031 2032 self._match_l_paren() 2033 expressions = self._parse_csv(self._parse_column) 2034 self._match_r_paren() 2035 2036 if self._match_text_seq("SORTED", "BY"): 2037 self._match_l_paren() 2038 sorted_by = self._parse_csv(self._parse_ordered) 2039 self._match_r_paren() 2040 else: 2041 sorted_by = None 2042 2043 self._match(TokenType.INTO) 2044 buckets = self._parse_number() 2045 self._match_text_seq("BUCKETS") 2046 2047 return self.expression( 2048 exp.ClusteredByProperty, 2049 expressions=expressions, 2050 sorted_by=sorted_by, 2051 buckets=buckets, 2052 ) 2053 2054 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2055 if not self._match_text_seq("GRANTS"): 2056 self._retreat(self._index - 1) 2057 return None 2058 2059 return self.expression(exp.CopyGrantsProperty) 2060 2061 def _parse_freespace(self) -> exp.FreespaceProperty: 2062 self._match(TokenType.EQ) 2063 return self.expression( 2064 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2065 ) 2066 2067 def _parse_mergeblockratio( 2068 self, no: bool = False, default: bool = False 2069 ) -> exp.MergeBlockRatioProperty: 2070 if self._match(TokenType.EQ): 2071 return self.expression( 2072 exp.MergeBlockRatioProperty, 2073 this=self._parse_number(), 2074 percent=self._match(TokenType.PERCENT), 2075 ) 2076 2077 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2078 2079 def _parse_datablocksize( 2080 self, 2081 default: t.Optional[bool] = None, 2082 minimum: t.Optional[bool] = None, 2083 maximum: t.Optional[bool] = None, 2084 ) -> exp.DataBlocksizeProperty: 2085 self._match(TokenType.EQ) 2086 size = self._parse_number() 2087 2088 units = None 2089 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2090 units = self._prev.text 2091 2092 return self.expression( 2093 exp.DataBlocksizeProperty, 2094 size=size, 2095 units=units, 2096 default=default, 2097 minimum=minimum, 2098 maximum=maximum, 2099 ) 2100 2101 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2102 self._match(TokenType.EQ) 2103 always = self._match_text_seq("ALWAYS") 2104 manual = self._match_text_seq("MANUAL") 2105 never = self._match_text_seq("NEVER") 2106 default = self._match_text_seq("DEFAULT") 2107 2108 autotemp = None 2109 if self._match_text_seq("AUTOTEMP"): 2110 autotemp = self._parse_schema() 2111 2112 return self.expression( 2113 exp.BlockCompressionProperty, 2114 always=always, 2115 manual=manual, 2116 never=never, 2117 default=default, 2118 autotemp=autotemp, 2119 ) 2120 2121 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2122 index = self._index 2123 no = self._match_text_seq("NO") 2124 concurrent = self._match_text_seq("CONCURRENT") 2125 2126 if not self._match_text_seq("ISOLATED", "LOADING"): 2127 self._retreat(index) 2128 return None 2129 2130 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2131 return self.expression( 2132 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2133 ) 2134 2135 def _parse_locking(self) -> exp.LockingProperty: 2136 if self._match(TokenType.TABLE): 2137 kind = "TABLE" 2138 elif self._match(TokenType.VIEW): 2139 kind = "VIEW" 2140 elif self._match(TokenType.ROW): 2141 kind = "ROW" 2142 elif self._match_text_seq("DATABASE"): 2143 kind = "DATABASE" 2144 else: 2145 kind = None 2146 2147 if kind in ("DATABASE", "TABLE", "VIEW"): 2148 this = self._parse_table_parts() 2149 else: 2150 this = None 2151 2152 if self._match(TokenType.FOR): 2153 for_or_in = "FOR" 2154 elif self._match(TokenType.IN): 2155 for_or_in = "IN" 2156 else: 2157 for_or_in = None 2158 2159 if self._match_text_seq("ACCESS"): 2160 lock_type = "ACCESS" 2161 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2162 lock_type = "EXCLUSIVE" 2163 elif self._match_text_seq("SHARE"): 2164 lock_type = "SHARE" 2165 elif self._match_text_seq("READ"): 2166 lock_type = "READ" 2167 elif self._match_text_seq("WRITE"): 2168 lock_type = "WRITE" 2169 elif self._match_text_seq("CHECKSUM"): 2170 lock_type = "CHECKSUM" 2171 else: 2172 lock_type = None 2173 2174 override = self._match_text_seq("OVERRIDE") 2175 2176 return self.expression( 2177 exp.LockingProperty, 2178 this=this, 2179 kind=kind, 2180 for_or_in=for_or_in, 2181 lock_type=lock_type, 2182 override=override, 2183 ) 2184 2185 def _parse_partition_by(self) -> t.List[exp.Expression]: 2186 if self._match(TokenType.PARTITION_BY): 2187 return self._parse_csv(self._parse_conjunction) 2188 return [] 2189 2190 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2191 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2192 if self._match_text_seq("MINVALUE"): 2193 return exp.var("MINVALUE") 2194 if self._match_text_seq("MAXVALUE"): 2195 return exp.var("MAXVALUE") 2196 return self._parse_bitwise() 2197 2198 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2199 expression = None 2200 from_expressions = None 2201 to_expressions = None 2202 2203 if self._match(TokenType.IN): 2204 this = self._parse_wrapped_csv(self._parse_bitwise) 2205 elif self._match(TokenType.FROM): 2206 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2207 self._match_text_seq("TO") 2208 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2209 elif self._match_text_seq("WITH", "(", "MODULUS"): 2210 this = self._parse_number() 2211 self._match_text_seq(",", "REMAINDER") 2212 expression = self._parse_number() 2213 self._match_r_paren() 2214 else: 2215 self.raise_error("Failed to parse partition bound spec.") 2216 2217 return self.expression( 2218 exp.PartitionBoundSpec, 2219 this=this, 2220 expression=expression, 2221 from_expressions=from_expressions, 2222 to_expressions=to_expressions, 2223 ) 2224 2225 # https://www.postgresql.org/docs/current/sql-createtable.html 2226 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2227 if not self._match_text_seq("OF"): 2228 self._retreat(self._index - 1) 2229 return None 2230 2231 this = self._parse_table(schema=True) 2232 2233 if self._match(TokenType.DEFAULT): 2234 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2235 elif self._match_text_seq("FOR", "VALUES"): 2236 expression = self._parse_partition_bound_spec() 2237 else: 2238 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2239 2240 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2241 2242 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2243 self._match(TokenType.EQ) 2244 return self.expression( 2245 exp.PartitionedByProperty, 2246 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2247 ) 2248 2249 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2250 if self._match_text_seq("AND", "STATISTICS"): 2251 statistics = True 2252 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2253 statistics = False 2254 else: 2255 statistics = None 2256 2257 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2258 2259 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2260 if self._match_text_seq("SQL"): 2261 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2262 return None 2263 2264 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2265 if self._match_text_seq("SQL", "DATA"): 2266 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2267 return None 2268 2269 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2270 if self._match_text_seq("PRIMARY", "INDEX"): 2271 return exp.NoPrimaryIndexProperty() 2272 if self._match_text_seq("SQL"): 2273 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2274 return None 2275 2276 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2277 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2278 return exp.OnCommitProperty() 2279 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2280 return exp.OnCommitProperty(delete=True) 2281 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2282 2283 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2284 if self._match_text_seq("SQL", "DATA"): 2285 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2286 return None 2287 2288 def _parse_distkey(self) -> exp.DistKeyProperty: 2289 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2290 2291 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2292 table = self._parse_table(schema=True) 2293 2294 options = [] 2295 while self._match_texts(("INCLUDING", "EXCLUDING")): 2296 this = self._prev.text.upper() 2297 2298 id_var = self._parse_id_var() 2299 if not id_var: 2300 return None 2301 2302 options.append( 2303 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2304 ) 2305 2306 return self.expression(exp.LikeProperty, this=table, expressions=options) 2307 2308 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2309 return self.expression( 2310 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2311 ) 2312 2313 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2314 self._match(TokenType.EQ) 2315 return self.expression( 2316 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2317 ) 2318 2319 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2320 self._match_text_seq("WITH", "CONNECTION") 2321 return self.expression( 2322 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2323 ) 2324 2325 def _parse_returns(self) -> exp.ReturnsProperty: 2326 value: t.Optional[exp.Expression] 2327 null = None 2328 is_table = self._match(TokenType.TABLE) 2329 2330 if is_table: 2331 if self._match(TokenType.LT): 2332 value = self.expression( 2333 exp.Schema, 2334 this="TABLE", 2335 expressions=self._parse_csv(self._parse_struct_types), 2336 ) 2337 if not self._match(TokenType.GT): 2338 self.raise_error("Expecting >") 2339 else: 2340 value = self._parse_schema(exp.var("TABLE")) 2341 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2342 null = True 2343 value = None 2344 else: 2345 value = self._parse_types() 2346 2347 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2348 2349 def _parse_describe(self) -> exp.Describe: 2350 kind = self._match_set(self.CREATABLES) and self._prev.text 2351 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2352 if self._match(TokenType.DOT): 2353 style = None 2354 self._retreat(self._index - 2) 2355 this = self._parse_table(schema=True) 2356 properties = self._parse_properties() 2357 expressions = properties.expressions if properties else None 2358 return self.expression( 2359 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2360 ) 2361 2362 def _parse_insert(self) -> exp.Insert: 2363 comments = ensure_list(self._prev_comments) 2364 hint = self._parse_hint() 2365 overwrite = self._match(TokenType.OVERWRITE) 2366 ignore = self._match(TokenType.IGNORE) 2367 local = self._match_text_seq("LOCAL") 2368 alternative = None 2369 is_function = None 2370 2371 if self._match_text_seq("DIRECTORY"): 2372 this: t.Optional[exp.Expression] = self.expression( 2373 exp.Directory, 2374 this=self._parse_var_or_string(), 2375 local=local, 2376 row_format=self._parse_row_format(match_row=True), 2377 ) 2378 else: 2379 if self._match(TokenType.OR): 2380 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2381 2382 self._match(TokenType.INTO) 2383 comments += ensure_list(self._prev_comments) 2384 self._match(TokenType.TABLE) 2385 is_function = self._match(TokenType.FUNCTION) 2386 2387 this = ( 2388 self._parse_table(schema=True, parse_partition=True) 2389 if not is_function 2390 else self._parse_function() 2391 ) 2392 2393 returning = self._parse_returning() 2394 2395 return self.expression( 2396 exp.Insert, 2397 comments=comments, 2398 hint=hint, 2399 is_function=is_function, 2400 this=this, 2401 stored=self._match_text_seq("STORED") and self._parse_stored(), 2402 by_name=self._match_text_seq("BY", "NAME"), 2403 exists=self._parse_exists(), 2404 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2405 and self._parse_conjunction(), 2406 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2407 conflict=self._parse_on_conflict(), 2408 returning=returning or self._parse_returning(), 2409 overwrite=overwrite, 2410 alternative=alternative, 2411 ignore=ignore, 2412 ) 2413 2414 def _parse_kill(self) -> exp.Kill: 2415 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2416 2417 return self.expression( 2418 exp.Kill, 2419 this=self._parse_primary(), 2420 kind=kind, 2421 ) 2422 2423 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2424 conflict = self._match_text_seq("ON", "CONFLICT") 2425 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2426 2427 if not conflict and not duplicate: 2428 return None 2429 2430 conflict_keys = None 2431 constraint = None 2432 2433 if conflict: 2434 if self._match_text_seq("ON", "CONSTRAINT"): 2435 constraint = self._parse_id_var() 2436 elif self._match(TokenType.L_PAREN): 2437 conflict_keys = self._parse_csv(self._parse_id_var) 2438 self._match_r_paren() 2439 2440 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2441 if self._prev.token_type == TokenType.UPDATE: 2442 self._match(TokenType.SET) 2443 expressions = self._parse_csv(self._parse_equality) 2444 else: 2445 expressions = None 2446 2447 return self.expression( 2448 exp.OnConflict, 2449 duplicate=duplicate, 2450 expressions=expressions, 2451 action=action, 2452 conflict_keys=conflict_keys, 2453 constraint=constraint, 2454 ) 2455 2456 def _parse_returning(self) -> t.Optional[exp.Returning]: 2457 if not self._match(TokenType.RETURNING): 2458 return None 2459 return self.expression( 2460 exp.Returning, 2461 expressions=self._parse_csv(self._parse_expression), 2462 into=self._match(TokenType.INTO) and self._parse_table_part(), 2463 ) 2464 2465 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2466 if not self._match(TokenType.FORMAT): 2467 return None 2468 return self._parse_row_format() 2469 2470 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2471 index = self._index 2472 with_ = with_ or self._match_text_seq("WITH") 2473 2474 if not self._match(TokenType.SERDE_PROPERTIES): 2475 self._retreat(index) 2476 return None 2477 return self.expression( 2478 exp.SerdeProperties, 2479 **{ # type: ignore 2480 "expressions": self._parse_wrapped_properties(), 2481 "with": with_, 2482 }, 2483 ) 2484 2485 def _parse_row_format( 2486 self, match_row: bool = False 2487 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2488 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2489 return None 2490 2491 if self._match_text_seq("SERDE"): 2492 this = self._parse_string() 2493 2494 serde_properties = self._parse_serde_properties() 2495 2496 return self.expression( 2497 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2498 ) 2499 2500 self._match_text_seq("DELIMITED") 2501 2502 kwargs = {} 2503 2504 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2505 kwargs["fields"] = self._parse_string() 2506 if self._match_text_seq("ESCAPED", "BY"): 2507 kwargs["escaped"] = self._parse_string() 2508 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2509 kwargs["collection_items"] = self._parse_string() 2510 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2511 kwargs["map_keys"] = self._parse_string() 2512 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2513 kwargs["lines"] = self._parse_string() 2514 if self._match_text_seq("NULL", "DEFINED", "AS"): 2515 kwargs["null"] = self._parse_string() 2516 2517 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2518 2519 def _parse_load(self) -> exp.LoadData | exp.Command: 2520 if self._match_text_seq("DATA"): 2521 local = self._match_text_seq("LOCAL") 2522 self._match_text_seq("INPATH") 2523 inpath = self._parse_string() 2524 overwrite = self._match(TokenType.OVERWRITE) 2525 self._match_pair(TokenType.INTO, TokenType.TABLE) 2526 2527 return self.expression( 2528 exp.LoadData, 2529 this=self._parse_table(schema=True), 2530 local=local, 2531 overwrite=overwrite, 2532 inpath=inpath, 2533 partition=self._parse_partition(), 2534 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2535 serde=self._match_text_seq("SERDE") and self._parse_string(), 2536 ) 2537 return self._parse_as_command(self._prev) 2538 2539 def _parse_delete(self) -> exp.Delete: 2540 # This handles MySQL's "Multiple-Table Syntax" 2541 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2542 tables = None 2543 comments = self._prev_comments 2544 if not self._match(TokenType.FROM, advance=False): 2545 tables = self._parse_csv(self._parse_table) or None 2546 2547 returning = self._parse_returning() 2548 2549 return self.expression( 2550 exp.Delete, 2551 comments=comments, 2552 tables=tables, 2553 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2554 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2555 where=self._parse_where(), 2556 returning=returning or self._parse_returning(), 2557 limit=self._parse_limit(), 2558 ) 2559 2560 def _parse_update(self) -> exp.Update: 2561 comments = self._prev_comments 2562 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2563 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2564 returning = self._parse_returning() 2565 return self.expression( 2566 exp.Update, 2567 comments=comments, 2568 **{ # type: ignore 2569 "this": this, 2570 "expressions": expressions, 2571 "from": self._parse_from(joins=True), 2572 "where": self._parse_where(), 2573 "returning": returning or self._parse_returning(), 2574 "order": self._parse_order(), 2575 "limit": self._parse_limit(), 2576 }, 2577 ) 2578 2579 def _parse_uncache(self) -> exp.Uncache: 2580 if not self._match(TokenType.TABLE): 2581 self.raise_error("Expecting TABLE after UNCACHE") 2582 2583 return self.expression( 2584 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2585 ) 2586 2587 def _parse_cache(self) -> exp.Cache: 2588 lazy = self._match_text_seq("LAZY") 2589 self._match(TokenType.TABLE) 2590 table = self._parse_table(schema=True) 2591 2592 options = [] 2593 if self._match_text_seq("OPTIONS"): 2594 self._match_l_paren() 2595 k = self._parse_string() 2596 self._match(TokenType.EQ) 2597 v = self._parse_string() 2598 options = [k, v] 2599 self._match_r_paren() 2600 2601 self._match(TokenType.ALIAS) 2602 return self.expression( 2603 exp.Cache, 2604 this=table, 2605 lazy=lazy, 2606 options=options, 2607 expression=self._parse_select(nested=True), 2608 ) 2609 2610 def _parse_partition(self) -> t.Optional[exp.Partition]: 2611 if not self._match(TokenType.PARTITION): 2612 return None 2613 2614 return self.expression( 2615 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2616 ) 2617 2618 def _parse_value(self) -> t.Optional[exp.Tuple]: 2619 if self._match(TokenType.L_PAREN): 2620 expressions = self._parse_csv(self._parse_expression) 2621 self._match_r_paren() 2622 return self.expression(exp.Tuple, expressions=expressions) 2623 2624 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2625 expression = self._parse_expression() 2626 if expression: 2627 return self.expression(exp.Tuple, expressions=[expression]) 2628 return None 2629 2630 def _parse_projections(self) -> t.List[exp.Expression]: 2631 return self._parse_expressions() 2632 2633 def _parse_select( 2634 self, 2635 nested: bool = False, 2636 table: bool = False, 2637 parse_subquery_alias: bool = True, 2638 parse_set_operation: bool = True, 2639 ) -> t.Optional[exp.Expression]: 2640 cte = self._parse_with() 2641 2642 if cte: 2643 this = self._parse_statement() 2644 2645 if not this: 2646 self.raise_error("Failed to parse any statement following CTE") 2647 return cte 2648 2649 if "with" in this.arg_types: 2650 this.set("with", cte) 2651 else: 2652 self.raise_error(f"{this.key} does not support CTE") 2653 this = cte 2654 2655 return this 2656 2657 # duckdb supports leading with FROM x 2658 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2659 2660 if self._match(TokenType.SELECT): 2661 comments = self._prev_comments 2662 2663 hint = self._parse_hint() 2664 all_ = self._match(TokenType.ALL) 2665 distinct = self._match_set(self.DISTINCT_TOKENS) 2666 2667 kind = ( 2668 self._match(TokenType.ALIAS) 2669 and self._match_texts(("STRUCT", "VALUE")) 2670 and self._prev.text.upper() 2671 ) 2672 2673 if distinct: 2674 distinct = self.expression( 2675 exp.Distinct, 2676 on=self._parse_value() if self._match(TokenType.ON) else None, 2677 ) 2678 2679 if all_ and distinct: 2680 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2681 2682 limit = self._parse_limit(top=True) 2683 projections = self._parse_projections() 2684 2685 this = self.expression( 2686 exp.Select, 2687 kind=kind, 2688 hint=hint, 2689 distinct=distinct, 2690 expressions=projections, 2691 limit=limit, 2692 ) 2693 this.comments = comments 2694 2695 into = self._parse_into() 2696 if into: 2697 this.set("into", into) 2698 2699 if not from_: 2700 from_ = self._parse_from() 2701 2702 if from_: 2703 this.set("from", from_) 2704 2705 this = self._parse_query_modifiers(this) 2706 elif (table or nested) and self._match(TokenType.L_PAREN): 2707 if self._match(TokenType.PIVOT): 2708 this = self._parse_simplified_pivot() 2709 elif self._match(TokenType.FROM): 2710 this = exp.select("*").from_( 2711 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2712 ) 2713 else: 2714 this = ( 2715 self._parse_table() 2716 if table 2717 else self._parse_select(nested=True, parse_set_operation=False) 2718 ) 2719 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2720 2721 self._match_r_paren() 2722 2723 # We return early here so that the UNION isn't attached to the subquery by the 2724 # following call to _parse_set_operations, but instead becomes the parent node 2725 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2726 elif self._match(TokenType.VALUES, advance=False): 2727 this = self._parse_derived_table_values() 2728 elif from_: 2729 this = exp.select("*").from_(from_.this, copy=False) 2730 else: 2731 this = None 2732 2733 if parse_set_operation: 2734 return self._parse_set_operations(this) 2735 return this 2736 2737 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2738 if not skip_with_token and not self._match(TokenType.WITH): 2739 return None 2740 2741 comments = self._prev_comments 2742 recursive = self._match(TokenType.RECURSIVE) 2743 2744 expressions = [] 2745 while True: 2746 expressions.append(self._parse_cte()) 2747 2748 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2749 break 2750 else: 2751 self._match(TokenType.WITH) 2752 2753 return self.expression( 2754 exp.With, comments=comments, expressions=expressions, recursive=recursive 2755 ) 2756 2757 def _parse_cte(self) -> exp.CTE: 2758 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2759 if not alias or not alias.this: 2760 self.raise_error("Expected CTE to have alias") 2761 2762 self._match(TokenType.ALIAS) 2763 2764 if self._match_text_seq("NOT", "MATERIALIZED"): 2765 materialized = False 2766 elif self._match_text_seq("MATERIALIZED"): 2767 materialized = True 2768 else: 2769 materialized = None 2770 2771 return self.expression( 2772 exp.CTE, 2773 this=self._parse_wrapped(self._parse_statement), 2774 alias=alias, 2775 materialized=materialized, 2776 ) 2777 2778 def _parse_table_alias( 2779 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2780 ) -> t.Optional[exp.TableAlias]: 2781 any_token = self._match(TokenType.ALIAS) 2782 alias = ( 2783 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2784 or self._parse_string_as_identifier() 2785 ) 2786 2787 index = self._index 2788 if self._match(TokenType.L_PAREN): 2789 columns = self._parse_csv(self._parse_function_parameter) 2790 self._match_r_paren() if columns else self._retreat(index) 2791 else: 2792 columns = None 2793 2794 if not alias and not columns: 2795 return None 2796 2797 return self.expression(exp.TableAlias, this=alias, columns=columns) 2798 2799 def _parse_subquery( 2800 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2801 ) -> t.Optional[exp.Subquery]: 2802 if not this: 2803 return None 2804 2805 return self.expression( 2806 exp.Subquery, 2807 this=this, 2808 pivots=self._parse_pivots(), 2809 alias=self._parse_table_alias() if parse_alias else None, 2810 ) 2811 2812 def _implicit_unnests_to_explicit(self, this: E) -> E: 2813 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2814 2815 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2816 for i, join in enumerate(this.args.get("joins") or []): 2817 table = join.this 2818 normalized_table = table.copy() 2819 normalized_table.meta["maybe_column"] = True 2820 normalized_table = _norm(normalized_table, dialect=self.dialect) 2821 2822 if isinstance(table, exp.Table) and not join.args.get("on"): 2823 if normalized_table.parts[0].name in refs: 2824 table_as_column = table.to_column() 2825 unnest = exp.Unnest(expressions=[table_as_column]) 2826 2827 # Table.to_column creates a parent Alias node that we want to convert to 2828 # a TableAlias and attach to the Unnest, so it matches the parser's output 2829 if isinstance(table.args.get("alias"), exp.TableAlias): 2830 table_as_column.replace(table_as_column.this) 2831 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2832 2833 table.replace(unnest) 2834 2835 refs.add(normalized_table.alias_or_name) 2836 2837 return this 2838 2839 def _parse_query_modifiers( 2840 self, this: t.Optional[exp.Expression] 2841 ) -> t.Optional[exp.Expression]: 2842 if isinstance(this, (exp.Query, exp.Table)): 2843 for join in self._parse_joins(): 2844 this.append("joins", join) 2845 for lateral in iter(self._parse_lateral, None): 2846 this.append("laterals", lateral) 2847 2848 while True: 2849 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2850 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2851 key, expression = parser(self) 2852 2853 if expression: 2854 this.set(key, expression) 2855 if key == "limit": 2856 offset = expression.args.pop("offset", None) 2857 2858 if offset: 2859 offset = exp.Offset(expression=offset) 2860 this.set("offset", offset) 2861 2862 limit_by_expressions = expression.expressions 2863 expression.set("expressions", None) 2864 offset.set("expressions", limit_by_expressions) 2865 continue 2866 break 2867 2868 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2869 this = self._implicit_unnests_to_explicit(this) 2870 2871 return this 2872 2873 def _parse_hint(self) -> t.Optional[exp.Hint]: 2874 if self._match(TokenType.HINT): 2875 hints = [] 2876 for hint in iter( 2877 lambda: self._parse_csv( 2878 lambda: self._parse_function() or self._parse_var(upper=True) 2879 ), 2880 [], 2881 ): 2882 hints.extend(hint) 2883 2884 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2885 self.raise_error("Expected */ after HINT") 2886 2887 return self.expression(exp.Hint, expressions=hints) 2888 2889 return None 2890 2891 def _parse_into(self) -> t.Optional[exp.Into]: 2892 if not self._match(TokenType.INTO): 2893 return None 2894 2895 temp = self._match(TokenType.TEMPORARY) 2896 unlogged = self._match_text_seq("UNLOGGED") 2897 self._match(TokenType.TABLE) 2898 2899 return self.expression( 2900 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2901 ) 2902 2903 def _parse_from( 2904 self, joins: bool = False, skip_from_token: bool = False 2905 ) -> t.Optional[exp.From]: 2906 if not skip_from_token and not self._match(TokenType.FROM): 2907 return None 2908 2909 return self.expression( 2910 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2911 ) 2912 2913 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2914 return self.expression( 2915 exp.MatchRecognizeMeasure, 2916 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2917 this=self._parse_expression(), 2918 ) 2919 2920 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2921 if not self._match(TokenType.MATCH_RECOGNIZE): 2922 return None 2923 2924 self._match_l_paren() 2925 2926 partition = self._parse_partition_by() 2927 order = self._parse_order() 2928 2929 measures = ( 2930 self._parse_csv(self._parse_match_recognize_measure) 2931 if self._match_text_seq("MEASURES") 2932 else None 2933 ) 2934 2935 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2936 rows = exp.var("ONE ROW PER MATCH") 2937 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2938 text = "ALL ROWS PER MATCH" 2939 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2940 text += " SHOW EMPTY MATCHES" 2941 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2942 text += " OMIT EMPTY MATCHES" 2943 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2944 text += " WITH UNMATCHED ROWS" 2945 rows = exp.var(text) 2946 else: 2947 rows = None 2948 2949 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2950 text = "AFTER MATCH SKIP" 2951 if self._match_text_seq("PAST", "LAST", "ROW"): 2952 text += " PAST LAST ROW" 2953 elif self._match_text_seq("TO", "NEXT", "ROW"): 2954 text += " TO NEXT ROW" 2955 elif self._match_text_seq("TO", "FIRST"): 2956 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2957 elif self._match_text_seq("TO", "LAST"): 2958 text += f" TO LAST {self._advance_any().text}" # type: ignore 2959 after = exp.var(text) 2960 else: 2961 after = None 2962 2963 if self._match_text_seq("PATTERN"): 2964 self._match_l_paren() 2965 2966 if not self._curr: 2967 self.raise_error("Expecting )", self._curr) 2968 2969 paren = 1 2970 start = self._curr 2971 2972 while self._curr and paren > 0: 2973 if self._curr.token_type == TokenType.L_PAREN: 2974 paren += 1 2975 if self._curr.token_type == TokenType.R_PAREN: 2976 paren -= 1 2977 2978 end = self._prev 2979 self._advance() 2980 2981 if paren > 0: 2982 self.raise_error("Expecting )", self._curr) 2983 2984 pattern = exp.var(self._find_sql(start, end)) 2985 else: 2986 pattern = None 2987 2988 define = ( 2989 self._parse_csv(self._parse_name_as_expression) 2990 if self._match_text_seq("DEFINE") 2991 else None 2992 ) 2993 2994 self._match_r_paren() 2995 2996 return self.expression( 2997 exp.MatchRecognize, 2998 partition_by=partition, 2999 order=order, 3000 measures=measures, 3001 rows=rows, 3002 after=after, 3003 pattern=pattern, 3004 define=define, 3005 alias=self._parse_table_alias(), 3006 ) 3007 3008 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3009 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3010 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3011 cross_apply = False 3012 3013 if cross_apply is not None: 3014 this = self._parse_select(table=True) 3015 view = None 3016 outer = None 3017 elif self._match(TokenType.LATERAL): 3018 this = self._parse_select(table=True) 3019 view = self._match(TokenType.VIEW) 3020 outer = self._match(TokenType.OUTER) 3021 else: 3022 return None 3023 3024 if not this: 3025 this = ( 3026 self._parse_unnest() 3027 or self._parse_function() 3028 or self._parse_id_var(any_token=False) 3029 ) 3030 3031 while self._match(TokenType.DOT): 3032 this = exp.Dot( 3033 this=this, 3034 expression=self._parse_function() or self._parse_id_var(any_token=False), 3035 ) 3036 3037 if view: 3038 table = self._parse_id_var(any_token=False) 3039 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3040 table_alias: t.Optional[exp.TableAlias] = self.expression( 3041 exp.TableAlias, this=table, columns=columns 3042 ) 3043 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3044 # We move the alias from the lateral's child node to the lateral itself 3045 table_alias = this.args["alias"].pop() 3046 else: 3047 table_alias = self._parse_table_alias() 3048 3049 return self.expression( 3050 exp.Lateral, 3051 this=this, 3052 view=view, 3053 outer=outer, 3054 alias=table_alias, 3055 cross_apply=cross_apply, 3056 ) 3057 3058 def _parse_join_parts( 3059 self, 3060 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3061 return ( 3062 self._match_set(self.JOIN_METHODS) and self._prev, 3063 self._match_set(self.JOIN_SIDES) and self._prev, 3064 self._match_set(self.JOIN_KINDS) and self._prev, 3065 ) 3066 3067 def _parse_join( 3068 self, skip_join_token: bool = False, parse_bracket: bool = False 3069 ) -> t.Optional[exp.Join]: 3070 if self._match(TokenType.COMMA): 3071 return self.expression(exp.Join, this=self._parse_table()) 3072 3073 index = self._index 3074 method, side, kind = self._parse_join_parts() 3075 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3076 join = self._match(TokenType.JOIN) 3077 3078 if not skip_join_token and not join: 3079 self._retreat(index) 3080 kind = None 3081 method = None 3082 side = None 3083 3084 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3085 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3086 3087 if not skip_join_token and not join and not outer_apply and not cross_apply: 3088 return None 3089 3090 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3091 3092 if method: 3093 kwargs["method"] = method.text 3094 if side: 3095 kwargs["side"] = side.text 3096 if kind: 3097 kwargs["kind"] = kind.text 3098 if hint: 3099 kwargs["hint"] = hint 3100 3101 if self._match(TokenType.MATCH_CONDITION): 3102 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3103 3104 if self._match(TokenType.ON): 3105 kwargs["on"] = self._parse_conjunction() 3106 elif self._match(TokenType.USING): 3107 kwargs["using"] = self._parse_wrapped_id_vars() 3108 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3109 kind and kind.token_type == TokenType.CROSS 3110 ): 3111 index = self._index 3112 joins: t.Optional[list] = list(self._parse_joins()) 3113 3114 if joins and self._match(TokenType.ON): 3115 kwargs["on"] = self._parse_conjunction() 3116 elif joins and self._match(TokenType.USING): 3117 kwargs["using"] = self._parse_wrapped_id_vars() 3118 else: 3119 joins = None 3120 self._retreat(index) 3121 3122 kwargs["this"].set("joins", joins if joins else None) 3123 3124 comments = [c for token in (method, side, kind) if token for c in token.comments] 3125 return self.expression(exp.Join, comments=comments, **kwargs) 3126 3127 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3128 this = self._parse_conjunction() 3129 3130 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3131 return this 3132 3133 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3134 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3135 3136 return this 3137 3138 def _parse_index_params(self) -> exp.IndexParameters: 3139 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3140 3141 if self._match(TokenType.L_PAREN, advance=False): 3142 columns = self._parse_wrapped_csv(self._parse_with_operator) 3143 else: 3144 columns = None 3145 3146 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3147 partition_by = self._parse_partition_by() 3148 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3149 tablespace = ( 3150 self._parse_var(any_token=True) 3151 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3152 else None 3153 ) 3154 where = self._parse_where() 3155 3156 return self.expression( 3157 exp.IndexParameters, 3158 using=using, 3159 columns=columns, 3160 include=include, 3161 partition_by=partition_by, 3162 where=where, 3163 with_storage=with_storage, 3164 tablespace=tablespace, 3165 ) 3166 3167 def _parse_index( 3168 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3169 ) -> t.Optional[exp.Index]: 3170 if index or anonymous: 3171 unique = None 3172 primary = None 3173 amp = None 3174 3175 self._match(TokenType.ON) 3176 self._match(TokenType.TABLE) # hive 3177 table = self._parse_table_parts(schema=True) 3178 else: 3179 unique = self._match(TokenType.UNIQUE) 3180 primary = self._match_text_seq("PRIMARY") 3181 amp = self._match_text_seq("AMP") 3182 3183 if not self._match(TokenType.INDEX): 3184 return None 3185 3186 index = self._parse_id_var() 3187 table = None 3188 3189 params = self._parse_index_params() 3190 3191 return self.expression( 3192 exp.Index, 3193 this=index, 3194 table=table, 3195 unique=unique, 3196 primary=primary, 3197 amp=amp, 3198 params=params, 3199 ) 3200 3201 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3202 hints: t.List[exp.Expression] = [] 3203 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3204 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3205 hints.append( 3206 self.expression( 3207 exp.WithTableHint, 3208 expressions=self._parse_csv( 3209 lambda: self._parse_function() or self._parse_var(any_token=True) 3210 ), 3211 ) 3212 ) 3213 self._match_r_paren() 3214 else: 3215 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3216 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3217 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3218 3219 self._match_texts(("INDEX", "KEY")) 3220 if self._match(TokenType.FOR): 3221 hint.set("target", self._advance_any() and self._prev.text.upper()) 3222 3223 hint.set("expressions", self._parse_wrapped_id_vars()) 3224 hints.append(hint) 3225 3226 return hints or None 3227 3228 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3229 return ( 3230 (not schema and self._parse_function(optional_parens=False)) 3231 or self._parse_id_var(any_token=False) 3232 or self._parse_string_as_identifier() 3233 or self._parse_placeholder() 3234 ) 3235 3236 def _parse_table_parts( 3237 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3238 ) -> exp.Table: 3239 catalog = None 3240 db = None 3241 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3242 3243 while self._match(TokenType.DOT): 3244 if catalog: 3245 # This allows nesting the table in arbitrarily many dot expressions if needed 3246 table = self.expression( 3247 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3248 ) 3249 else: 3250 catalog = db 3251 db = table 3252 # "" used for tsql FROM a..b case 3253 table = self._parse_table_part(schema=schema) or "" 3254 3255 if ( 3256 wildcard 3257 and self._is_connected() 3258 and (isinstance(table, exp.Identifier) or not table) 3259 and self._match(TokenType.STAR) 3260 ): 3261 if isinstance(table, exp.Identifier): 3262 table.args["this"] += "*" 3263 else: 3264 table = exp.Identifier(this="*") 3265 3266 # We bubble up comments from the Identifier to the Table 3267 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3268 3269 if is_db_reference: 3270 catalog = db 3271 db = table 3272 table = None 3273 3274 if not table and not is_db_reference: 3275 self.raise_error(f"Expected table name but got {self._curr}") 3276 if not db and is_db_reference: 3277 self.raise_error(f"Expected database name but got {self._curr}") 3278 3279 return self.expression( 3280 exp.Table, 3281 comments=comments, 3282 this=table, 3283 db=db, 3284 catalog=catalog, 3285 pivots=self._parse_pivots(), 3286 ) 3287 3288 def _parse_table( 3289 self, 3290 schema: bool = False, 3291 joins: bool = False, 3292 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3293 parse_bracket: bool = False, 3294 is_db_reference: bool = False, 3295 parse_partition: bool = False, 3296 ) -> t.Optional[exp.Expression]: 3297 lateral = self._parse_lateral() 3298 if lateral: 3299 return lateral 3300 3301 unnest = self._parse_unnest() 3302 if unnest: 3303 return unnest 3304 3305 values = self._parse_derived_table_values() 3306 if values: 3307 return values 3308 3309 subquery = self._parse_select(table=True) 3310 if subquery: 3311 if not subquery.args.get("pivots"): 3312 subquery.set("pivots", self._parse_pivots()) 3313 return subquery 3314 3315 bracket = parse_bracket and self._parse_bracket(None) 3316 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3317 3318 only = self._match(TokenType.ONLY) 3319 3320 this = t.cast( 3321 exp.Expression, 3322 bracket 3323 or self._parse_bracket( 3324 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3325 ), 3326 ) 3327 3328 if only: 3329 this.set("only", only) 3330 3331 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3332 self._match_text_seq("*") 3333 3334 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3335 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3336 this.set("partition", self._parse_partition()) 3337 3338 if schema: 3339 return self._parse_schema(this=this) 3340 3341 version = self._parse_version() 3342 3343 if version: 3344 this.set("version", version) 3345 3346 if self.dialect.ALIAS_POST_TABLESAMPLE: 3347 table_sample = self._parse_table_sample() 3348 3349 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3350 if alias: 3351 this.set("alias", alias) 3352 3353 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3354 return self.expression( 3355 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3356 ) 3357 3358 this.set("hints", self._parse_table_hints()) 3359 3360 if not this.args.get("pivots"): 3361 this.set("pivots", self._parse_pivots()) 3362 3363 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3364 table_sample = self._parse_table_sample() 3365 3366 if table_sample: 3367 table_sample.set("this", this) 3368 this = table_sample 3369 3370 if joins: 3371 for join in self._parse_joins(): 3372 this.append("joins", join) 3373 3374 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3375 this.set("ordinality", True) 3376 this.set("alias", self._parse_table_alias()) 3377 3378 return this 3379 3380 def _parse_version(self) -> t.Optional[exp.Version]: 3381 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3382 this = "TIMESTAMP" 3383 elif self._match(TokenType.VERSION_SNAPSHOT): 3384 this = "VERSION" 3385 else: 3386 return None 3387 3388 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3389 kind = self._prev.text.upper() 3390 start = self._parse_bitwise() 3391 self._match_texts(("TO", "AND")) 3392 end = self._parse_bitwise() 3393 expression: t.Optional[exp.Expression] = self.expression( 3394 exp.Tuple, expressions=[start, end] 3395 ) 3396 elif self._match_text_seq("CONTAINED", "IN"): 3397 kind = "CONTAINED IN" 3398 expression = self.expression( 3399 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3400 ) 3401 elif self._match(TokenType.ALL): 3402 kind = "ALL" 3403 expression = None 3404 else: 3405 self._match_text_seq("AS", "OF") 3406 kind = "AS OF" 3407 expression = self._parse_type() 3408 3409 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3410 3411 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3412 if not self._match(TokenType.UNNEST): 3413 return None 3414 3415 expressions = self._parse_wrapped_csv(self._parse_equality) 3416 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3417 3418 alias = self._parse_table_alias() if with_alias else None 3419 3420 if alias: 3421 if self.dialect.UNNEST_COLUMN_ONLY: 3422 if alias.args.get("columns"): 3423 self.raise_error("Unexpected extra column alias in unnest.") 3424 3425 alias.set("columns", [alias.this]) 3426 alias.set("this", None) 3427 3428 columns = alias.args.get("columns") or [] 3429 if offset and len(expressions) < len(columns): 3430 offset = columns.pop() 3431 3432 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3433 self._match(TokenType.ALIAS) 3434 offset = self._parse_id_var( 3435 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3436 ) or exp.to_identifier("offset") 3437 3438 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3439 3440 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3441 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3442 if not is_derived and not self._match_text_seq("VALUES"): 3443 return None 3444 3445 expressions = self._parse_csv(self._parse_value) 3446 alias = self._parse_table_alias() 3447 3448 if is_derived: 3449 self._match_r_paren() 3450 3451 return self.expression( 3452 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3453 ) 3454 3455 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3456 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3457 as_modifier and self._match_text_seq("USING", "SAMPLE") 3458 ): 3459 return None 3460 3461 bucket_numerator = None 3462 bucket_denominator = None 3463 bucket_field = None 3464 percent = None 3465 size = None 3466 seed = None 3467 3468 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3469 matched_l_paren = self._match(TokenType.L_PAREN) 3470 3471 if self.TABLESAMPLE_CSV: 3472 num = None 3473 expressions = self._parse_csv(self._parse_primary) 3474 else: 3475 expressions = None 3476 num = ( 3477 self._parse_factor() 3478 if self._match(TokenType.NUMBER, advance=False) 3479 else self._parse_primary() or self._parse_placeholder() 3480 ) 3481 3482 if self._match_text_seq("BUCKET"): 3483 bucket_numerator = self._parse_number() 3484 self._match_text_seq("OUT", "OF") 3485 bucket_denominator = bucket_denominator = self._parse_number() 3486 self._match(TokenType.ON) 3487 bucket_field = self._parse_field() 3488 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3489 percent = num 3490 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3491 size = num 3492 else: 3493 percent = num 3494 3495 if matched_l_paren: 3496 self._match_r_paren() 3497 3498 if self._match(TokenType.L_PAREN): 3499 method = self._parse_var(upper=True) 3500 seed = self._match(TokenType.COMMA) and self._parse_number() 3501 self._match_r_paren() 3502 elif self._match_texts(("SEED", "REPEATABLE")): 3503 seed = self._parse_wrapped(self._parse_number) 3504 3505 if not method and self.DEFAULT_SAMPLING_METHOD: 3506 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3507 3508 return self.expression( 3509 exp.TableSample, 3510 expressions=expressions, 3511 method=method, 3512 bucket_numerator=bucket_numerator, 3513 bucket_denominator=bucket_denominator, 3514 bucket_field=bucket_field, 3515 percent=percent, 3516 size=size, 3517 seed=seed, 3518 ) 3519 3520 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3521 return list(iter(self._parse_pivot, None)) or None 3522 3523 def _parse_joins(self) -> t.Iterator[exp.Join]: 3524 return iter(self._parse_join, None) 3525 3526 # https://duckdb.org/docs/sql/statements/pivot 3527 def _parse_simplified_pivot(self) -> exp.Pivot: 3528 def _parse_on() -> t.Optional[exp.Expression]: 3529 this = self._parse_bitwise() 3530 return self._parse_in(this) if self._match(TokenType.IN) else this 3531 3532 this = self._parse_table() 3533 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3534 using = self._match(TokenType.USING) and self._parse_csv( 3535 lambda: self._parse_alias(self._parse_function()) 3536 ) 3537 group = self._parse_group() 3538 return self.expression( 3539 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3540 ) 3541 3542 def _parse_pivot_in(self) -> exp.In: 3543 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3544 this = self._parse_conjunction() 3545 3546 self._match(TokenType.ALIAS) 3547 alias = self._parse_field() 3548 if alias: 3549 return self.expression(exp.PivotAlias, this=this, alias=alias) 3550 3551 return this 3552 3553 value = self._parse_column() 3554 3555 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3556 self.raise_error("Expecting IN (") 3557 3558 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3559 3560 self._match_r_paren() 3561 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3562 3563 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3564 index = self._index 3565 include_nulls = None 3566 3567 if self._match(TokenType.PIVOT): 3568 unpivot = False 3569 elif self._match(TokenType.UNPIVOT): 3570 unpivot = True 3571 3572 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3573 if self._match_text_seq("INCLUDE", "NULLS"): 3574 include_nulls = True 3575 elif self._match_text_seq("EXCLUDE", "NULLS"): 3576 include_nulls = False 3577 else: 3578 return None 3579 3580 expressions = [] 3581 3582 if not self._match(TokenType.L_PAREN): 3583 self._retreat(index) 3584 return None 3585 3586 if unpivot: 3587 expressions = self._parse_csv(self._parse_column) 3588 else: 3589 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3590 3591 if not expressions: 3592 self.raise_error("Failed to parse PIVOT's aggregation list") 3593 3594 if not self._match(TokenType.FOR): 3595 self.raise_error("Expecting FOR") 3596 3597 field = self._parse_pivot_in() 3598 3599 self._match_r_paren() 3600 3601 pivot = self.expression( 3602 exp.Pivot, 3603 expressions=expressions, 3604 field=field, 3605 unpivot=unpivot, 3606 include_nulls=include_nulls, 3607 ) 3608 3609 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3610 pivot.set("alias", self._parse_table_alias()) 3611 3612 if not unpivot: 3613 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3614 3615 columns: t.List[exp.Expression] = [] 3616 for fld in pivot.args["field"].expressions: 3617 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3618 for name in names: 3619 if self.PREFIXED_PIVOT_COLUMNS: 3620 name = f"{name}_{field_name}" if name else field_name 3621 else: 3622 name = f"{field_name}_{name}" if name else field_name 3623 3624 columns.append(exp.to_identifier(name)) 3625 3626 pivot.set("columns", columns) 3627 3628 return pivot 3629 3630 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3631 return [agg.alias for agg in aggregations] 3632 3633 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3634 if not skip_where_token and not self._match(TokenType.PREWHERE): 3635 return None 3636 3637 return self.expression( 3638 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3639 ) 3640 3641 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3642 if not skip_where_token and not self._match(TokenType.WHERE): 3643 return None 3644 3645 return self.expression( 3646 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3647 ) 3648 3649 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3650 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3651 return None 3652 3653 elements: t.Dict[str, t.Any] = defaultdict(list) 3654 3655 if self._match(TokenType.ALL): 3656 elements["all"] = True 3657 elif self._match(TokenType.DISTINCT): 3658 elements["all"] = False 3659 3660 while True: 3661 expressions = self._parse_csv( 3662 lambda: None 3663 if self._match(TokenType.ROLLUP, advance=False) 3664 else self._parse_conjunction() 3665 ) 3666 if expressions: 3667 elements["expressions"].extend(expressions) 3668 3669 grouping_sets = self._parse_grouping_sets() 3670 if grouping_sets: 3671 elements["grouping_sets"].extend(grouping_sets) 3672 3673 rollup = None 3674 cube = None 3675 totals = None 3676 3677 index = self._index 3678 with_ = self._match(TokenType.WITH) 3679 if self._match(TokenType.ROLLUP): 3680 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3681 elements["rollup"].extend(ensure_list(rollup)) 3682 3683 if self._match(TokenType.CUBE): 3684 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3685 elements["cube"].extend(ensure_list(cube)) 3686 3687 if self._match_text_seq("TOTALS"): 3688 totals = True 3689 elements["totals"] = True # type: ignore 3690 3691 if not (grouping_sets or rollup or cube or totals): 3692 if with_: 3693 self._retreat(index) 3694 break 3695 3696 return self.expression(exp.Group, **elements) # type: ignore 3697 3698 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3699 if not self._match(TokenType.GROUPING_SETS): 3700 return None 3701 3702 return self._parse_wrapped_csv(self._parse_grouping_set) 3703 3704 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3705 if self._match(TokenType.L_PAREN): 3706 grouping_set = self._parse_csv(self._parse_column) 3707 self._match_r_paren() 3708 return self.expression(exp.Tuple, expressions=grouping_set) 3709 3710 return self._parse_column() 3711 3712 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3713 if not skip_having_token and not self._match(TokenType.HAVING): 3714 return None 3715 return self.expression(exp.Having, this=self._parse_conjunction()) 3716 3717 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3718 if not self._match(TokenType.QUALIFY): 3719 return None 3720 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3721 3722 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3723 if skip_start_token: 3724 start = None 3725 elif self._match(TokenType.START_WITH): 3726 start = self._parse_conjunction() 3727 else: 3728 return None 3729 3730 self._match(TokenType.CONNECT_BY) 3731 nocycle = self._match_text_seq("NOCYCLE") 3732 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3733 exp.Prior, this=self._parse_bitwise() 3734 ) 3735 connect = self._parse_conjunction() 3736 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3737 3738 if not start and self._match(TokenType.START_WITH): 3739 start = self._parse_conjunction() 3740 3741 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3742 3743 def _parse_name_as_expression(self) -> exp.Alias: 3744 return self.expression( 3745 exp.Alias, 3746 alias=self._parse_id_var(any_token=True), 3747 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3748 ) 3749 3750 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3751 if self._match_text_seq("INTERPOLATE"): 3752 return self._parse_wrapped_csv(self._parse_name_as_expression) 3753 return None 3754 3755 def _parse_order( 3756 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3757 ) -> t.Optional[exp.Expression]: 3758 siblings = None 3759 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3760 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3761 return this 3762 3763 siblings = True 3764 3765 return self.expression( 3766 exp.Order, 3767 this=this, 3768 expressions=self._parse_csv(self._parse_ordered), 3769 interpolate=self._parse_interpolate(), 3770 siblings=siblings, 3771 ) 3772 3773 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3774 if not self._match(token): 3775 return None 3776 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3777 3778 def _parse_ordered( 3779 self, parse_method: t.Optional[t.Callable] = None 3780 ) -> t.Optional[exp.Ordered]: 3781 this = parse_method() if parse_method else self._parse_conjunction() 3782 if not this: 3783 return None 3784 3785 asc = self._match(TokenType.ASC) 3786 desc = self._match(TokenType.DESC) or (asc and False) 3787 3788 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3789 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3790 3791 nulls_first = is_nulls_first or False 3792 explicitly_null_ordered = is_nulls_first or is_nulls_last 3793 3794 if ( 3795 not explicitly_null_ordered 3796 and ( 3797 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3798 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3799 ) 3800 and self.dialect.NULL_ORDERING != "nulls_are_last" 3801 ): 3802 nulls_first = True 3803 3804 if self._match_text_seq("WITH", "FILL"): 3805 with_fill = self.expression( 3806 exp.WithFill, 3807 **{ # type: ignore 3808 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3809 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3810 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3811 }, 3812 ) 3813 else: 3814 with_fill = None 3815 3816 return self.expression( 3817 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3818 ) 3819 3820 def _parse_limit( 3821 self, 3822 this: t.Optional[exp.Expression] = None, 3823 top: bool = False, 3824 skip_limit_token: bool = False, 3825 ) -> t.Optional[exp.Expression]: 3826 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3827 comments = self._prev_comments 3828 if top: 3829 limit_paren = self._match(TokenType.L_PAREN) 3830 expression = self._parse_term() if limit_paren else self._parse_number() 3831 3832 if limit_paren: 3833 self._match_r_paren() 3834 else: 3835 expression = self._parse_term() 3836 3837 if self._match(TokenType.COMMA): 3838 offset = expression 3839 expression = self._parse_term() 3840 else: 3841 offset = None 3842 3843 limit_exp = self.expression( 3844 exp.Limit, 3845 this=this, 3846 expression=expression, 3847 offset=offset, 3848 comments=comments, 3849 expressions=self._parse_limit_by(), 3850 ) 3851 3852 return limit_exp 3853 3854 if self._match(TokenType.FETCH): 3855 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3856 direction = self._prev.text.upper() if direction else "FIRST" 3857 3858 count = self._parse_field(tokens=self.FETCH_TOKENS) 3859 percent = self._match(TokenType.PERCENT) 3860 3861 self._match_set((TokenType.ROW, TokenType.ROWS)) 3862 3863 only = self._match_text_seq("ONLY") 3864 with_ties = self._match_text_seq("WITH", "TIES") 3865 3866 if only and with_ties: 3867 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3868 3869 return self.expression( 3870 exp.Fetch, 3871 direction=direction, 3872 count=count, 3873 percent=percent, 3874 with_ties=with_ties, 3875 ) 3876 3877 return this 3878 3879 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3880 if not self._match(TokenType.OFFSET): 3881 return this 3882 3883 count = self._parse_term() 3884 self._match_set((TokenType.ROW, TokenType.ROWS)) 3885 3886 return self.expression( 3887 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3888 ) 3889 3890 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3891 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3892 3893 def _parse_locks(self) -> t.List[exp.Lock]: 3894 locks = [] 3895 while True: 3896 if self._match_text_seq("FOR", "UPDATE"): 3897 update = True 3898 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3899 "LOCK", "IN", "SHARE", "MODE" 3900 ): 3901 update = False 3902 else: 3903 break 3904 3905 expressions = None 3906 if self._match_text_seq("OF"): 3907 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3908 3909 wait: t.Optional[bool | exp.Expression] = None 3910 if self._match_text_seq("NOWAIT"): 3911 wait = True 3912 elif self._match_text_seq("WAIT"): 3913 wait = self._parse_primary() 3914 elif self._match_text_seq("SKIP", "LOCKED"): 3915 wait = False 3916 3917 locks.append( 3918 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3919 ) 3920 3921 return locks 3922 3923 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3924 while this and self._match_set(self.SET_OPERATIONS): 3925 token_type = self._prev.token_type 3926 3927 if token_type == TokenType.UNION: 3928 operation = exp.Union 3929 elif token_type == TokenType.EXCEPT: 3930 operation = exp.Except 3931 else: 3932 operation = exp.Intersect 3933 3934 comments = self._prev.comments 3935 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3936 by_name = self._match_text_seq("BY", "NAME") 3937 expression = self._parse_select(nested=True, parse_set_operation=False) 3938 3939 this = self.expression( 3940 operation, 3941 comments=comments, 3942 this=this, 3943 distinct=distinct, 3944 by_name=by_name, 3945 expression=expression, 3946 ) 3947 3948 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3949 expression = this.expression 3950 3951 if expression: 3952 for arg in self.UNION_MODIFIERS: 3953 expr = expression.args.get(arg) 3954 if expr: 3955 this.set(arg, expr.pop()) 3956 3957 return this 3958 3959 def _parse_expression(self) -> t.Optional[exp.Expression]: 3960 return self._parse_alias(self._parse_conjunction()) 3961 3962 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3963 this = self._parse_equality() 3964 3965 if self._match(TokenType.COLON_EQ): 3966 this = self.expression( 3967 exp.PropertyEQ, 3968 this=this, 3969 comments=self._prev_comments, 3970 expression=self._parse_conjunction(), 3971 ) 3972 3973 while self._match_set(self.CONJUNCTION): 3974 this = self.expression( 3975 self.CONJUNCTION[self._prev.token_type], 3976 this=this, 3977 comments=self._prev_comments, 3978 expression=self._parse_equality(), 3979 ) 3980 return this 3981 3982 def _parse_equality(self) -> t.Optional[exp.Expression]: 3983 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3984 3985 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3986 return self._parse_tokens(self._parse_range, self.COMPARISON) 3987 3988 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3989 this = this or self._parse_bitwise() 3990 negate = self._match(TokenType.NOT) 3991 3992 if self._match_set(self.RANGE_PARSERS): 3993 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3994 if not expression: 3995 return this 3996 3997 this = expression 3998 elif self._match(TokenType.ISNULL): 3999 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4000 4001 # Postgres supports ISNULL and NOTNULL for conditions. 4002 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4003 if self._match(TokenType.NOTNULL): 4004 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4005 this = self.expression(exp.Not, this=this) 4006 4007 if negate: 4008 this = self.expression(exp.Not, this=this) 4009 4010 if self._match(TokenType.IS): 4011 this = self._parse_is(this) 4012 4013 return this 4014 4015 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4016 index = self._index - 1 4017 negate = self._match(TokenType.NOT) 4018 4019 if self._match_text_seq("DISTINCT", "FROM"): 4020 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4021 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4022 4023 expression = self._parse_null() or self._parse_boolean() 4024 if not expression: 4025 self._retreat(index) 4026 return None 4027 4028 this = self.expression(exp.Is, this=this, expression=expression) 4029 return self.expression(exp.Not, this=this) if negate else this 4030 4031 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4032 unnest = self._parse_unnest(with_alias=False) 4033 if unnest: 4034 this = self.expression(exp.In, this=this, unnest=unnest) 4035 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4036 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4037 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4038 4039 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4040 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4041 else: 4042 this = self.expression(exp.In, this=this, expressions=expressions) 4043 4044 if matched_l_paren: 4045 self._match_r_paren(this) 4046 elif not self._match(TokenType.R_BRACKET, expression=this): 4047 self.raise_error("Expecting ]") 4048 else: 4049 this = self.expression(exp.In, this=this, field=self._parse_field()) 4050 4051 return this 4052 4053 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4054 low = self._parse_bitwise() 4055 self._match(TokenType.AND) 4056 high = self._parse_bitwise() 4057 return self.expression(exp.Between, this=this, low=low, high=high) 4058 4059 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4060 if not self._match(TokenType.ESCAPE): 4061 return this 4062 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4063 4064 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4065 index = self._index 4066 4067 if not self._match(TokenType.INTERVAL) and match_interval: 4068 return None 4069 4070 if self._match(TokenType.STRING, advance=False): 4071 this = self._parse_primary() 4072 else: 4073 this = self._parse_term() 4074 4075 if not this or ( 4076 isinstance(this, exp.Column) 4077 and not this.table 4078 and not this.this.quoted 4079 and this.name.upper() == "IS" 4080 ): 4081 self._retreat(index) 4082 return None 4083 4084 unit = self._parse_function() or ( 4085 not self._match(TokenType.ALIAS, advance=False) 4086 and self._parse_var(any_token=True, upper=True) 4087 ) 4088 4089 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4090 # each INTERVAL expression into this canonical form so it's easy to transpile 4091 if this and this.is_number: 4092 this = exp.Literal.string(this.name) 4093 elif this and this.is_string: 4094 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4095 if len(parts) == 1: 4096 if unit: 4097 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4098 self._retreat(self._index - 1) 4099 4100 this = exp.Literal.string(parts[0][0]) 4101 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4102 4103 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4104 unit = self.expression( 4105 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4106 ) 4107 4108 interval = self.expression(exp.Interval, this=this, unit=unit) 4109 4110 index = self._index 4111 self._match(TokenType.PLUS) 4112 4113 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4114 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4115 return self.expression( 4116 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4117 ) 4118 4119 self._retreat(index) 4120 return interval 4121 4122 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4123 this = self._parse_term() 4124 4125 while True: 4126 if self._match_set(self.BITWISE): 4127 this = self.expression( 4128 self.BITWISE[self._prev.token_type], 4129 this=this, 4130 expression=self._parse_term(), 4131 ) 4132 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4133 this = self.expression( 4134 exp.DPipe, 4135 this=this, 4136 expression=self._parse_term(), 4137 safe=not self.dialect.STRICT_STRING_CONCAT, 4138 ) 4139 elif self._match(TokenType.DQMARK): 4140 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4141 elif self._match_pair(TokenType.LT, TokenType.LT): 4142 this = self.expression( 4143 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4144 ) 4145 elif self._match_pair(TokenType.GT, TokenType.GT): 4146 this = self.expression( 4147 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4148 ) 4149 else: 4150 break 4151 4152 return this 4153 4154 def _parse_term(self) -> t.Optional[exp.Expression]: 4155 return self._parse_tokens(self._parse_factor, self.TERM) 4156 4157 def _parse_factor(self) -> t.Optional[exp.Expression]: 4158 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4159 this = parse_method() 4160 4161 while self._match_set(self.FACTOR): 4162 this = self.expression( 4163 self.FACTOR[self._prev.token_type], 4164 this=this, 4165 comments=self._prev_comments, 4166 expression=parse_method(), 4167 ) 4168 if isinstance(this, exp.Div): 4169 this.args["typed"] = self.dialect.TYPED_DIVISION 4170 this.args["safe"] = self.dialect.SAFE_DIVISION 4171 4172 return this 4173 4174 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4175 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4176 4177 def _parse_unary(self) -> t.Optional[exp.Expression]: 4178 if self._match_set(self.UNARY_PARSERS): 4179 return self.UNARY_PARSERS[self._prev.token_type](self) 4180 return self._parse_at_time_zone(self._parse_type()) 4181 4182 def _parse_type( 4183 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4184 ) -> t.Optional[exp.Expression]: 4185 interval = parse_interval and self._parse_interval() 4186 if interval: 4187 return interval 4188 4189 index = self._index 4190 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4191 4192 if data_type: 4193 index2 = self._index 4194 this = self._parse_primary() 4195 4196 if isinstance(this, exp.Literal): 4197 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4198 if parser: 4199 return parser(self, this, data_type) 4200 4201 return self.expression(exp.Cast, this=this, to=data_type) 4202 4203 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4204 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4205 # 4206 # If the index difference here is greater than 1, that means the parser itself must have 4207 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4208 # 4209 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4210 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4211 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4212 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4213 # 4214 # In these cases, we don't really want to return the converted type, but instead retreat 4215 # and try to parse a Column or Identifier in the section below. 4216 if data_type.expressions and index2 - index > 1: 4217 self._retreat(index2) 4218 return self._parse_column_ops(data_type) 4219 4220 self._retreat(index) 4221 4222 if fallback_to_identifier: 4223 return self._parse_id_var() 4224 4225 this = self._parse_column() 4226 return this and self._parse_column_ops(this) 4227 4228 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4229 this = self._parse_type() 4230 if not this: 4231 return None 4232 4233 if isinstance(this, exp.Column) and not this.table: 4234 this = exp.var(this.name.upper()) 4235 4236 return self.expression( 4237 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4238 ) 4239 4240 def _parse_types( 4241 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4242 ) -> t.Optional[exp.Expression]: 4243 index = self._index 4244 4245 this: t.Optional[exp.Expression] = None 4246 prefix = self._match_text_seq("SYSUDTLIB", ".") 4247 4248 if not self._match_set(self.TYPE_TOKENS): 4249 identifier = allow_identifiers and self._parse_id_var( 4250 any_token=False, tokens=(TokenType.VAR,) 4251 ) 4252 if identifier: 4253 tokens = self.dialect.tokenize(identifier.name) 4254 4255 if len(tokens) != 1: 4256 self.raise_error("Unexpected identifier", self._prev) 4257 4258 if tokens[0].token_type in self.TYPE_TOKENS: 4259 self._prev = tokens[0] 4260 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4261 type_name = identifier.name 4262 4263 while self._match(TokenType.DOT): 4264 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4265 4266 this = exp.DataType.build(type_name, udt=True) 4267 else: 4268 self._retreat(self._index - 1) 4269 return None 4270 else: 4271 return None 4272 4273 type_token = self._prev.token_type 4274 4275 if type_token == TokenType.PSEUDO_TYPE: 4276 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4277 4278 if type_token == TokenType.OBJECT_IDENTIFIER: 4279 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4280 4281 nested = type_token in self.NESTED_TYPE_TOKENS 4282 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4283 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4284 expressions = None 4285 maybe_func = False 4286 4287 if self._match(TokenType.L_PAREN): 4288 if is_struct: 4289 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4290 elif nested: 4291 expressions = self._parse_csv( 4292 lambda: self._parse_types( 4293 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4294 ) 4295 ) 4296 elif type_token in self.ENUM_TYPE_TOKENS: 4297 expressions = self._parse_csv(self._parse_equality) 4298 elif is_aggregate: 4299 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4300 any_token=False, tokens=(TokenType.VAR,) 4301 ) 4302 if not func_or_ident or not self._match(TokenType.COMMA): 4303 return None 4304 expressions = self._parse_csv( 4305 lambda: self._parse_types( 4306 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4307 ) 4308 ) 4309 expressions.insert(0, func_or_ident) 4310 else: 4311 expressions = self._parse_csv(self._parse_type_size) 4312 4313 if not expressions or not self._match(TokenType.R_PAREN): 4314 self._retreat(index) 4315 return None 4316 4317 maybe_func = True 4318 4319 values: t.Optional[t.List[exp.Expression]] = None 4320 4321 if nested and self._match(TokenType.LT): 4322 if is_struct: 4323 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4324 else: 4325 expressions = self._parse_csv( 4326 lambda: self._parse_types( 4327 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4328 ) 4329 ) 4330 4331 if not self._match(TokenType.GT): 4332 self.raise_error("Expecting >") 4333 4334 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4335 values = self._parse_csv(self._parse_conjunction) 4336 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4337 4338 if type_token in self.TIMESTAMPS: 4339 if self._match_text_seq("WITH", "TIME", "ZONE"): 4340 maybe_func = False 4341 tz_type = ( 4342 exp.DataType.Type.TIMETZ 4343 if type_token in self.TIMES 4344 else exp.DataType.Type.TIMESTAMPTZ 4345 ) 4346 this = exp.DataType(this=tz_type, expressions=expressions) 4347 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4348 maybe_func = False 4349 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4350 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4351 maybe_func = False 4352 elif type_token == TokenType.INTERVAL: 4353 unit = self._parse_var(upper=True) 4354 if unit: 4355 if self._match_text_seq("TO"): 4356 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4357 4358 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4359 else: 4360 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4361 4362 if maybe_func and check_func: 4363 index2 = self._index 4364 peek = self._parse_string() 4365 4366 if not peek: 4367 self._retreat(index) 4368 return None 4369 4370 self._retreat(index2) 4371 4372 if not this: 4373 if self._match_text_seq("UNSIGNED"): 4374 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4375 if not unsigned_type_token: 4376 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4377 4378 type_token = unsigned_type_token or type_token 4379 4380 this = exp.DataType( 4381 this=exp.DataType.Type[type_token.value], 4382 expressions=expressions, 4383 nested=nested, 4384 values=values, 4385 prefix=prefix, 4386 ) 4387 elif expressions: 4388 this.set("expressions", expressions) 4389 4390 index = self._index 4391 4392 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4393 matched_array = self._match(TokenType.ARRAY) 4394 4395 while self._curr: 4396 matched_l_bracket = self._match(TokenType.L_BRACKET) 4397 if not matched_l_bracket and not matched_array: 4398 break 4399 4400 matched_array = False 4401 values = self._parse_csv(self._parse_conjunction) or None 4402 if values and not schema: 4403 self._retreat(index) 4404 break 4405 4406 this = exp.DataType( 4407 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4408 ) 4409 self._match(TokenType.R_BRACKET) 4410 4411 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4412 converter = self.TYPE_CONVERTER.get(this.this) 4413 if converter: 4414 this = converter(t.cast(exp.DataType, this)) 4415 4416 return this 4417 4418 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4419 index = self._index 4420 this = ( 4421 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4422 or self._parse_id_var() 4423 ) 4424 self._match(TokenType.COLON) 4425 4426 if ( 4427 type_required 4428 and not isinstance(this, exp.DataType) 4429 and not self._match_set(self.TYPE_TOKENS, advance=False) 4430 ): 4431 self._retreat(index) 4432 return self._parse_types() 4433 4434 return self._parse_column_def(this) 4435 4436 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4437 if not self._match_text_seq("AT", "TIME", "ZONE"): 4438 return this 4439 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4440 4441 def _parse_column(self) -> t.Optional[exp.Expression]: 4442 this = self._parse_column_reference() 4443 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4444 4445 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4446 this = self._parse_field() 4447 if ( 4448 not this 4449 and self._match(TokenType.VALUES, advance=False) 4450 and self.VALUES_FOLLOWED_BY_PAREN 4451 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4452 ): 4453 this = self._parse_id_var() 4454 4455 if isinstance(this, exp.Identifier): 4456 # We bubble up comments from the Identifier to the Column 4457 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4458 4459 return this 4460 4461 def _parse_colon_as_json_extract( 4462 self, this: t.Optional[exp.Expression] 4463 ) -> t.Optional[exp.Expression]: 4464 casts = [] 4465 json_path = [] 4466 4467 while self._match(TokenType.COLON): 4468 start_index = self._index 4469 path = self._parse_column_ops(self._parse_field(any_token=True)) 4470 4471 # The cast :: operator has a lower precedence than the extraction operator :, so 4472 # we rearrange the AST appropriately to avoid casting the JSON path 4473 while isinstance(path, exp.Cast): 4474 casts.append(path.to) 4475 path = path.this 4476 4477 if casts: 4478 dcolon_offset = next( 4479 i 4480 for i, t in enumerate(self._tokens[start_index:]) 4481 if t.token_type == TokenType.DCOLON 4482 ) 4483 end_token = self._tokens[start_index + dcolon_offset - 1] 4484 else: 4485 end_token = self._prev 4486 4487 if path: 4488 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4489 4490 if json_path: 4491 this = self.expression( 4492 exp.JSONExtract, 4493 this=this, 4494 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4495 ) 4496 4497 while casts: 4498 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4499 4500 return this 4501 4502 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4503 this = self._parse_bracket(this) 4504 4505 while self._match_set(self.COLUMN_OPERATORS): 4506 op_token = self._prev.token_type 4507 op = self.COLUMN_OPERATORS.get(op_token) 4508 4509 if op_token == TokenType.DCOLON: 4510 field = self._parse_types() 4511 if not field: 4512 self.raise_error("Expected type") 4513 elif op and self._curr: 4514 field = self._parse_column_reference() 4515 else: 4516 field = self._parse_field(any_token=True, anonymous_func=True) 4517 4518 if isinstance(field, exp.Func) and this: 4519 # bigquery allows function calls like x.y.count(...) 4520 # SAFE.SUBSTR(...) 4521 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4522 this = exp.replace_tree( 4523 this, 4524 lambda n: ( 4525 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4526 if n.table 4527 else n.this 4528 ) 4529 if isinstance(n, exp.Column) 4530 else n, 4531 ) 4532 4533 if op: 4534 this = op(self, this, field) 4535 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4536 this = self.expression( 4537 exp.Column, 4538 this=field, 4539 table=this.this, 4540 db=this.args.get("table"), 4541 catalog=this.args.get("db"), 4542 ) 4543 else: 4544 this = self.expression(exp.Dot, this=this, expression=field) 4545 4546 this = self._parse_bracket(this) 4547 4548 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4549 4550 def _parse_primary(self) -> t.Optional[exp.Expression]: 4551 if self._match_set(self.PRIMARY_PARSERS): 4552 token_type = self._prev.token_type 4553 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4554 4555 if token_type == TokenType.STRING: 4556 expressions = [primary] 4557 while self._match(TokenType.STRING): 4558 expressions.append(exp.Literal.string(self._prev.text)) 4559 4560 if len(expressions) > 1: 4561 return self.expression(exp.Concat, expressions=expressions) 4562 4563 return primary 4564 4565 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4566 return exp.Literal.number(f"0.{self._prev.text}") 4567 4568 if self._match(TokenType.L_PAREN): 4569 comments = self._prev_comments 4570 query = self._parse_select() 4571 4572 if query: 4573 expressions = [query] 4574 else: 4575 expressions = self._parse_expressions() 4576 4577 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4578 4579 if not this and self._match(TokenType.R_PAREN, advance=False): 4580 this = self.expression(exp.Tuple) 4581 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4582 this = self._parse_subquery(this=this, parse_alias=False) 4583 elif isinstance(this, exp.Subquery): 4584 this = self._parse_subquery( 4585 this=self._parse_set_operations(this), parse_alias=False 4586 ) 4587 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4588 this = self.expression(exp.Tuple, expressions=expressions) 4589 else: 4590 this = self.expression(exp.Paren, this=this) 4591 4592 if this: 4593 this.add_comments(comments) 4594 4595 self._match_r_paren(expression=this) 4596 return this 4597 4598 return None 4599 4600 def _parse_field( 4601 self, 4602 any_token: bool = False, 4603 tokens: t.Optional[t.Collection[TokenType]] = None, 4604 anonymous_func: bool = False, 4605 ) -> t.Optional[exp.Expression]: 4606 if anonymous_func: 4607 field = ( 4608 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4609 or self._parse_primary() 4610 ) 4611 else: 4612 field = self._parse_primary() or self._parse_function( 4613 anonymous=anonymous_func, any_token=any_token 4614 ) 4615 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4616 4617 def _parse_function( 4618 self, 4619 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4620 anonymous: bool = False, 4621 optional_parens: bool = True, 4622 any_token: bool = False, 4623 ) -> t.Optional[exp.Expression]: 4624 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4625 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4626 fn_syntax = False 4627 if ( 4628 self._match(TokenType.L_BRACE, advance=False) 4629 and self._next 4630 and self._next.text.upper() == "FN" 4631 ): 4632 self._advance(2) 4633 fn_syntax = True 4634 4635 func = self._parse_function_call( 4636 functions=functions, 4637 anonymous=anonymous, 4638 optional_parens=optional_parens, 4639 any_token=any_token, 4640 ) 4641 4642 if fn_syntax: 4643 self._match(TokenType.R_BRACE) 4644 4645 return func 4646 4647 def _parse_function_call( 4648 self, 4649 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4650 anonymous: bool = False, 4651 optional_parens: bool = True, 4652 any_token: bool = False, 4653 ) -> t.Optional[exp.Expression]: 4654 if not self._curr: 4655 return None 4656 4657 comments = self._curr.comments 4658 token_type = self._curr.token_type 4659 this = self._curr.text 4660 upper = this.upper() 4661 4662 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4663 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4664 self._advance() 4665 return self._parse_window(parser(self)) 4666 4667 if not self._next or self._next.token_type != TokenType.L_PAREN: 4668 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4669 self._advance() 4670 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4671 4672 return None 4673 4674 if any_token: 4675 if token_type in self.RESERVED_TOKENS: 4676 return None 4677 elif token_type not in self.FUNC_TOKENS: 4678 return None 4679 4680 self._advance(2) 4681 4682 parser = self.FUNCTION_PARSERS.get(upper) 4683 if parser and not anonymous: 4684 this = parser(self) 4685 else: 4686 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4687 4688 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4689 this = self.expression(subquery_predicate, this=self._parse_select()) 4690 self._match_r_paren() 4691 return this 4692 4693 if functions is None: 4694 functions = self.FUNCTIONS 4695 4696 function = functions.get(upper) 4697 4698 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4699 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4700 4701 if alias: 4702 args = self._kv_to_prop_eq(args) 4703 4704 if function and not anonymous: 4705 if "dialect" in function.__code__.co_varnames: 4706 func = function(args, dialect=self.dialect) 4707 else: 4708 func = function(args) 4709 4710 func = self.validate_expression(func, args) 4711 if not self.dialect.NORMALIZE_FUNCTIONS: 4712 func.meta["name"] = this 4713 4714 this = func 4715 else: 4716 if token_type == TokenType.IDENTIFIER: 4717 this = exp.Identifier(this=this, quoted=True) 4718 this = self.expression(exp.Anonymous, this=this, expressions=args) 4719 4720 if isinstance(this, exp.Expression): 4721 this.add_comments(comments) 4722 4723 self._match_r_paren(this) 4724 return self._parse_window(this) 4725 4726 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4727 transformed = [] 4728 4729 for e in expressions: 4730 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4731 if isinstance(e, exp.Alias): 4732 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4733 4734 if not isinstance(e, exp.PropertyEQ): 4735 e = self.expression( 4736 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4737 ) 4738 4739 if isinstance(e.this, exp.Column): 4740 e.this.replace(e.this.this) 4741 4742 transformed.append(e) 4743 4744 return transformed 4745 4746 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4747 return self._parse_column_def(self._parse_id_var()) 4748 4749 def _parse_user_defined_function( 4750 self, kind: t.Optional[TokenType] = None 4751 ) -> t.Optional[exp.Expression]: 4752 this = self._parse_id_var() 4753 4754 while self._match(TokenType.DOT): 4755 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4756 4757 if not self._match(TokenType.L_PAREN): 4758 return this 4759 4760 expressions = self._parse_csv(self._parse_function_parameter) 4761 self._match_r_paren() 4762 return self.expression( 4763 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4764 ) 4765 4766 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4767 literal = self._parse_primary() 4768 if literal: 4769 return self.expression(exp.Introducer, this=token.text, expression=literal) 4770 4771 return self.expression(exp.Identifier, this=token.text) 4772 4773 def _parse_session_parameter(self) -> exp.SessionParameter: 4774 kind = None 4775 this = self._parse_id_var() or self._parse_primary() 4776 4777 if this and self._match(TokenType.DOT): 4778 kind = this.name 4779 this = self._parse_var() or self._parse_primary() 4780 4781 return self.expression(exp.SessionParameter, this=this, kind=kind) 4782 4783 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4784 return self._parse_id_var() 4785 4786 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4787 index = self._index 4788 4789 if self._match(TokenType.L_PAREN): 4790 expressions = t.cast( 4791 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4792 ) 4793 4794 if not self._match(TokenType.R_PAREN): 4795 self._retreat(index) 4796 else: 4797 expressions = [self._parse_lambda_arg()] 4798 4799 if self._match_set(self.LAMBDAS): 4800 return self.LAMBDAS[self._prev.token_type](self, expressions) 4801 4802 self._retreat(index) 4803 4804 this: t.Optional[exp.Expression] 4805 4806 if self._match(TokenType.DISTINCT): 4807 this = self.expression( 4808 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4809 ) 4810 else: 4811 this = self._parse_select_or_expression(alias=alias) 4812 4813 return self._parse_limit( 4814 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4815 ) 4816 4817 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4818 index = self._index 4819 if not self._match(TokenType.L_PAREN): 4820 return this 4821 4822 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4823 # expr can be of both types 4824 if self._match_set(self.SELECT_START_TOKENS): 4825 self._retreat(index) 4826 return this 4827 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4828 self._match_r_paren() 4829 return self.expression(exp.Schema, this=this, expressions=args) 4830 4831 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4832 return self._parse_column_def(self._parse_field(any_token=True)) 4833 4834 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4835 # column defs are not really columns, they're identifiers 4836 if isinstance(this, exp.Column): 4837 this = this.this 4838 4839 kind = self._parse_types(schema=True) 4840 4841 if self._match_text_seq("FOR", "ORDINALITY"): 4842 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4843 4844 constraints: t.List[exp.Expression] = [] 4845 4846 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4847 ("ALIAS", "MATERIALIZED") 4848 ): 4849 persisted = self._prev.text.upper() == "MATERIALIZED" 4850 constraints.append( 4851 self.expression( 4852 exp.ComputedColumnConstraint, 4853 this=self._parse_conjunction(), 4854 persisted=persisted or self._match_text_seq("PERSISTED"), 4855 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4856 ) 4857 ) 4858 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4859 self._match(TokenType.ALIAS) 4860 constraints.append( 4861 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4862 ) 4863 4864 while True: 4865 constraint = self._parse_column_constraint() 4866 if not constraint: 4867 break 4868 constraints.append(constraint) 4869 4870 if not kind and not constraints: 4871 return this 4872 4873 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4874 4875 def _parse_auto_increment( 4876 self, 4877 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4878 start = None 4879 increment = None 4880 4881 if self._match(TokenType.L_PAREN, advance=False): 4882 args = self._parse_wrapped_csv(self._parse_bitwise) 4883 start = seq_get(args, 0) 4884 increment = seq_get(args, 1) 4885 elif self._match_text_seq("START"): 4886 start = self._parse_bitwise() 4887 self._match_text_seq("INCREMENT") 4888 increment = self._parse_bitwise() 4889 4890 if start and increment: 4891 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4892 4893 return exp.AutoIncrementColumnConstraint() 4894 4895 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4896 if not self._match_text_seq("REFRESH"): 4897 self._retreat(self._index - 1) 4898 return None 4899 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4900 4901 def _parse_compress(self) -> exp.CompressColumnConstraint: 4902 if self._match(TokenType.L_PAREN, advance=False): 4903 return self.expression( 4904 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4905 ) 4906 4907 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4908 4909 def _parse_generated_as_identity( 4910 self, 4911 ) -> ( 4912 exp.GeneratedAsIdentityColumnConstraint 4913 | exp.ComputedColumnConstraint 4914 | exp.GeneratedAsRowColumnConstraint 4915 ): 4916 if self._match_text_seq("BY", "DEFAULT"): 4917 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4918 this = self.expression( 4919 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4920 ) 4921 else: 4922 self._match_text_seq("ALWAYS") 4923 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4924 4925 self._match(TokenType.ALIAS) 4926 4927 if self._match_text_seq("ROW"): 4928 start = self._match_text_seq("START") 4929 if not start: 4930 self._match(TokenType.END) 4931 hidden = self._match_text_seq("HIDDEN") 4932 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4933 4934 identity = self._match_text_seq("IDENTITY") 4935 4936 if self._match(TokenType.L_PAREN): 4937 if self._match(TokenType.START_WITH): 4938 this.set("start", self._parse_bitwise()) 4939 if self._match_text_seq("INCREMENT", "BY"): 4940 this.set("increment", self._parse_bitwise()) 4941 if self._match_text_seq("MINVALUE"): 4942 this.set("minvalue", self._parse_bitwise()) 4943 if self._match_text_seq("MAXVALUE"): 4944 this.set("maxvalue", self._parse_bitwise()) 4945 4946 if self._match_text_seq("CYCLE"): 4947 this.set("cycle", True) 4948 elif self._match_text_seq("NO", "CYCLE"): 4949 this.set("cycle", False) 4950 4951 if not identity: 4952 this.set("expression", self._parse_range()) 4953 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4954 args = self._parse_csv(self._parse_bitwise) 4955 this.set("start", seq_get(args, 0)) 4956 this.set("increment", seq_get(args, 1)) 4957 4958 self._match_r_paren() 4959 4960 return this 4961 4962 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4963 self._match_text_seq("LENGTH") 4964 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4965 4966 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4967 if self._match_text_seq("NULL"): 4968 return self.expression(exp.NotNullColumnConstraint) 4969 if self._match_text_seq("CASESPECIFIC"): 4970 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4971 if self._match_text_seq("FOR", "REPLICATION"): 4972 return self.expression(exp.NotForReplicationColumnConstraint) 4973 return None 4974 4975 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4976 if self._match(TokenType.CONSTRAINT): 4977 this = self._parse_id_var() 4978 else: 4979 this = None 4980 4981 if self._match_texts(self.CONSTRAINT_PARSERS): 4982 return self.expression( 4983 exp.ColumnConstraint, 4984 this=this, 4985 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4986 ) 4987 4988 return this 4989 4990 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4991 if not self._match(TokenType.CONSTRAINT): 4992 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4993 4994 return self.expression( 4995 exp.Constraint, 4996 this=self._parse_id_var(), 4997 expressions=self._parse_unnamed_constraints(), 4998 ) 4999 5000 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5001 constraints = [] 5002 while True: 5003 constraint = self._parse_unnamed_constraint() or self._parse_function() 5004 if not constraint: 5005 break 5006 constraints.append(constraint) 5007 5008 return constraints 5009 5010 def _parse_unnamed_constraint( 5011 self, constraints: t.Optional[t.Collection[str]] = None 5012 ) -> t.Optional[exp.Expression]: 5013 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5014 constraints or self.CONSTRAINT_PARSERS 5015 ): 5016 return None 5017 5018 constraint = self._prev.text.upper() 5019 if constraint not in self.CONSTRAINT_PARSERS: 5020 self.raise_error(f"No parser found for schema constraint {constraint}.") 5021 5022 return self.CONSTRAINT_PARSERS[constraint](self) 5023 5024 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5025 self._match_text_seq("KEY") 5026 return self.expression( 5027 exp.UniqueColumnConstraint, 5028 this=self._parse_schema(self._parse_id_var(any_token=False)), 5029 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5030 on_conflict=self._parse_on_conflict(), 5031 ) 5032 5033 def _parse_key_constraint_options(self) -> t.List[str]: 5034 options = [] 5035 while True: 5036 if not self._curr: 5037 break 5038 5039 if self._match(TokenType.ON): 5040 action = None 5041 on = self._advance_any() and self._prev.text 5042 5043 if self._match_text_seq("NO", "ACTION"): 5044 action = "NO ACTION" 5045 elif self._match_text_seq("CASCADE"): 5046 action = "CASCADE" 5047 elif self._match_text_seq("RESTRICT"): 5048 action = "RESTRICT" 5049 elif self._match_pair(TokenType.SET, TokenType.NULL): 5050 action = "SET NULL" 5051 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5052 action = "SET DEFAULT" 5053 else: 5054 self.raise_error("Invalid key constraint") 5055 5056 options.append(f"ON {on} {action}") 5057 elif self._match_text_seq("NOT", "ENFORCED"): 5058 options.append("NOT ENFORCED") 5059 elif self._match_text_seq("DEFERRABLE"): 5060 options.append("DEFERRABLE") 5061 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5062 options.append("INITIALLY DEFERRED") 5063 elif self._match_text_seq("NORELY"): 5064 options.append("NORELY") 5065 elif self._match_text_seq("MATCH", "FULL"): 5066 options.append("MATCH FULL") 5067 else: 5068 break 5069 5070 return options 5071 5072 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5073 if match and not self._match(TokenType.REFERENCES): 5074 return None 5075 5076 expressions = None 5077 this = self._parse_table(schema=True) 5078 options = self._parse_key_constraint_options() 5079 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5080 5081 def _parse_foreign_key(self) -> exp.ForeignKey: 5082 expressions = self._parse_wrapped_id_vars() 5083 reference = self._parse_references() 5084 options = {} 5085 5086 while self._match(TokenType.ON): 5087 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5088 self.raise_error("Expected DELETE or UPDATE") 5089 5090 kind = self._prev.text.lower() 5091 5092 if self._match_text_seq("NO", "ACTION"): 5093 action = "NO ACTION" 5094 elif self._match(TokenType.SET): 5095 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5096 action = "SET " + self._prev.text.upper() 5097 else: 5098 self._advance() 5099 action = self._prev.text.upper() 5100 5101 options[kind] = action 5102 5103 return self.expression( 5104 exp.ForeignKey, 5105 expressions=expressions, 5106 reference=reference, 5107 **options, # type: ignore 5108 ) 5109 5110 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5111 return self._parse_field() 5112 5113 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5114 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5115 self._retreat(self._index - 1) 5116 return None 5117 5118 id_vars = self._parse_wrapped_id_vars() 5119 return self.expression( 5120 exp.PeriodForSystemTimeConstraint, 5121 this=seq_get(id_vars, 0), 5122 expression=seq_get(id_vars, 1), 5123 ) 5124 5125 def _parse_primary_key( 5126 self, wrapped_optional: bool = False, in_props: bool = False 5127 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5128 desc = ( 5129 self._match_set((TokenType.ASC, TokenType.DESC)) 5130 and self._prev.token_type == TokenType.DESC 5131 ) 5132 5133 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5134 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5135 5136 expressions = self._parse_wrapped_csv( 5137 self._parse_primary_key_part, optional=wrapped_optional 5138 ) 5139 options = self._parse_key_constraint_options() 5140 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5141 5142 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5143 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 5144 5145 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5146 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5147 return this 5148 5149 bracket_kind = self._prev.token_type 5150 expressions = self._parse_csv( 5151 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5152 ) 5153 5154 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5155 self.raise_error("Expected ]") 5156 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5157 self.raise_error("Expected }") 5158 5159 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5160 if bracket_kind == TokenType.L_BRACE: 5161 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5162 elif not this or this.name.upper() == "ARRAY": 5163 this = self.expression(exp.Array, expressions=expressions) 5164 else: 5165 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5166 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5167 5168 self._add_comments(this) 5169 return self._parse_bracket(this) 5170 5171 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5172 if self._match(TokenType.COLON): 5173 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 5174 return this 5175 5176 def _parse_case(self) -> t.Optional[exp.Expression]: 5177 ifs = [] 5178 default = None 5179 5180 comments = self._prev_comments 5181 expression = self._parse_conjunction() 5182 5183 while self._match(TokenType.WHEN): 5184 this = self._parse_conjunction() 5185 self._match(TokenType.THEN) 5186 then = self._parse_conjunction() 5187 ifs.append(self.expression(exp.If, this=this, true=then)) 5188 5189 if self._match(TokenType.ELSE): 5190 default = self._parse_conjunction() 5191 5192 if not self._match(TokenType.END): 5193 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5194 default = exp.column("interval") 5195 else: 5196 self.raise_error("Expected END after CASE", self._prev) 5197 5198 return self.expression( 5199 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5200 ) 5201 5202 def _parse_if(self) -> t.Optional[exp.Expression]: 5203 if self._match(TokenType.L_PAREN): 5204 args = self._parse_csv(self._parse_conjunction) 5205 this = self.validate_expression(exp.If.from_arg_list(args), args) 5206 self._match_r_paren() 5207 else: 5208 index = self._index - 1 5209 5210 if self.NO_PAREN_IF_COMMANDS and index == 0: 5211 return self._parse_as_command(self._prev) 5212 5213 condition = self._parse_conjunction() 5214 5215 if not condition: 5216 self._retreat(index) 5217 return None 5218 5219 self._match(TokenType.THEN) 5220 true = self._parse_conjunction() 5221 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5222 self._match(TokenType.END) 5223 this = self.expression(exp.If, this=condition, true=true, false=false) 5224 5225 return this 5226 5227 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5228 if not self._match_text_seq("VALUE", "FOR"): 5229 self._retreat(self._index - 1) 5230 return None 5231 5232 return self.expression( 5233 exp.NextValueFor, 5234 this=self._parse_column(), 5235 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5236 ) 5237 5238 def _parse_extract(self) -> exp.Extract: 5239 this = self._parse_function() or self._parse_var() or self._parse_type() 5240 5241 if self._match(TokenType.FROM): 5242 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5243 5244 if not self._match(TokenType.COMMA): 5245 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5246 5247 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5248 5249 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5250 this = self._parse_conjunction() 5251 5252 if not self._match(TokenType.ALIAS): 5253 if self._match(TokenType.COMMA): 5254 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5255 5256 self.raise_error("Expected AS after CAST") 5257 5258 fmt = None 5259 to = self._parse_types() 5260 5261 if self._match(TokenType.FORMAT): 5262 fmt_string = self._parse_string() 5263 fmt = self._parse_at_time_zone(fmt_string) 5264 5265 if not to: 5266 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5267 if to.this in exp.DataType.TEMPORAL_TYPES: 5268 this = self.expression( 5269 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5270 this=this, 5271 format=exp.Literal.string( 5272 format_time( 5273 fmt_string.this if fmt_string else "", 5274 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5275 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5276 ) 5277 ), 5278 ) 5279 5280 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5281 this.set("zone", fmt.args["zone"]) 5282 return this 5283 elif not to: 5284 self.raise_error("Expected TYPE after CAST") 5285 elif isinstance(to, exp.Identifier): 5286 to = exp.DataType.build(to.name, udt=True) 5287 elif to.this == exp.DataType.Type.CHAR: 5288 if self._match(TokenType.CHARACTER_SET): 5289 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5290 5291 return self.expression( 5292 exp.Cast if strict else exp.TryCast, 5293 this=this, 5294 to=to, 5295 format=fmt, 5296 safe=safe, 5297 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5298 ) 5299 5300 def _parse_string_agg(self) -> exp.Expression: 5301 if self._match(TokenType.DISTINCT): 5302 args: t.List[t.Optional[exp.Expression]] = [ 5303 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5304 ] 5305 if self._match(TokenType.COMMA): 5306 args.extend(self._parse_csv(self._parse_conjunction)) 5307 else: 5308 args = self._parse_csv(self._parse_conjunction) # type: ignore 5309 5310 index = self._index 5311 if not self._match(TokenType.R_PAREN) and args: 5312 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5313 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5314 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5315 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5316 5317 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5318 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5319 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5320 if not self._match_text_seq("WITHIN", "GROUP"): 5321 self._retreat(index) 5322 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5323 5324 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5325 order = self._parse_order(this=seq_get(args, 0)) 5326 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5327 5328 def _parse_convert( 5329 self, strict: bool, safe: t.Optional[bool] = None 5330 ) -> t.Optional[exp.Expression]: 5331 this = self._parse_bitwise() 5332 5333 if self._match(TokenType.USING): 5334 to: t.Optional[exp.Expression] = self.expression( 5335 exp.CharacterSet, this=self._parse_var() 5336 ) 5337 elif self._match(TokenType.COMMA): 5338 to = self._parse_types() 5339 else: 5340 to = None 5341 5342 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5343 5344 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5345 """ 5346 There are generally two variants of the DECODE function: 5347 5348 - DECODE(bin, charset) 5349 - DECODE(expression, search, result [, search, result] ... [, default]) 5350 5351 The second variant will always be parsed into a CASE expression. Note that NULL 5352 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5353 instead of relying on pattern matching. 5354 """ 5355 args = self._parse_csv(self._parse_conjunction) 5356 5357 if len(args) < 3: 5358 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5359 5360 expression, *expressions = args 5361 if not expression: 5362 return None 5363 5364 ifs = [] 5365 for search, result in zip(expressions[::2], expressions[1::2]): 5366 if not search or not result: 5367 return None 5368 5369 if isinstance(search, exp.Literal): 5370 ifs.append( 5371 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5372 ) 5373 elif isinstance(search, exp.Null): 5374 ifs.append( 5375 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5376 ) 5377 else: 5378 cond = exp.or_( 5379 exp.EQ(this=expression.copy(), expression=search), 5380 exp.and_( 5381 exp.Is(this=expression.copy(), expression=exp.Null()), 5382 exp.Is(this=search.copy(), expression=exp.Null()), 5383 copy=False, 5384 ), 5385 copy=False, 5386 ) 5387 ifs.append(exp.If(this=cond, true=result)) 5388 5389 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5390 5391 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5392 self._match_text_seq("KEY") 5393 key = self._parse_column() 5394 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5395 self._match_text_seq("VALUE") 5396 value = self._parse_bitwise() 5397 5398 if not key and not value: 5399 return None 5400 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5401 5402 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5403 if not this or not self._match_text_seq("FORMAT", "JSON"): 5404 return this 5405 5406 return self.expression(exp.FormatJson, this=this) 5407 5408 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5409 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5410 for value in values: 5411 if self._match_text_seq(value, "ON", on): 5412 return f"{value} ON {on}" 5413 5414 return None 5415 5416 @t.overload 5417 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5418 5419 @t.overload 5420 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5421 5422 def _parse_json_object(self, agg=False): 5423 star = self._parse_star() 5424 expressions = ( 5425 [star] 5426 if star 5427 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5428 ) 5429 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5430 5431 unique_keys = None 5432 if self._match_text_seq("WITH", "UNIQUE"): 5433 unique_keys = True 5434 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5435 unique_keys = False 5436 5437 self._match_text_seq("KEYS") 5438 5439 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5440 self._parse_type() 5441 ) 5442 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5443 5444 return self.expression( 5445 exp.JSONObjectAgg if agg else exp.JSONObject, 5446 expressions=expressions, 5447 null_handling=null_handling, 5448 unique_keys=unique_keys, 5449 return_type=return_type, 5450 encoding=encoding, 5451 ) 5452 5453 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5454 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5455 if not self._match_text_seq("NESTED"): 5456 this = self._parse_id_var() 5457 kind = self._parse_types(allow_identifiers=False) 5458 nested = None 5459 else: 5460 this = None 5461 kind = None 5462 nested = True 5463 5464 path = self._match_text_seq("PATH") and self._parse_string() 5465 nested_schema = nested and self._parse_json_schema() 5466 5467 return self.expression( 5468 exp.JSONColumnDef, 5469 this=this, 5470 kind=kind, 5471 path=path, 5472 nested_schema=nested_schema, 5473 ) 5474 5475 def _parse_json_schema(self) -> exp.JSONSchema: 5476 self._match_text_seq("COLUMNS") 5477 return self.expression( 5478 exp.JSONSchema, 5479 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5480 ) 5481 5482 def _parse_json_table(self) -> exp.JSONTable: 5483 this = self._parse_format_json(self._parse_bitwise()) 5484 path = self._match(TokenType.COMMA) and self._parse_string() 5485 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5486 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5487 schema = self._parse_json_schema() 5488 5489 return exp.JSONTable( 5490 this=this, 5491 schema=schema, 5492 path=path, 5493 error_handling=error_handling, 5494 empty_handling=empty_handling, 5495 ) 5496 5497 def _parse_match_against(self) -> exp.MatchAgainst: 5498 expressions = self._parse_csv(self._parse_column) 5499 5500 self._match_text_seq(")", "AGAINST", "(") 5501 5502 this = self._parse_string() 5503 5504 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5505 modifier = "IN NATURAL LANGUAGE MODE" 5506 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5507 modifier = f"{modifier} WITH QUERY EXPANSION" 5508 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5509 modifier = "IN BOOLEAN MODE" 5510 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5511 modifier = "WITH QUERY EXPANSION" 5512 else: 5513 modifier = None 5514 5515 return self.expression( 5516 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5517 ) 5518 5519 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5520 def _parse_open_json(self) -> exp.OpenJSON: 5521 this = self._parse_bitwise() 5522 path = self._match(TokenType.COMMA) and self._parse_string() 5523 5524 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5525 this = self._parse_field(any_token=True) 5526 kind = self._parse_types() 5527 path = self._parse_string() 5528 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5529 5530 return self.expression( 5531 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5532 ) 5533 5534 expressions = None 5535 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5536 self._match_l_paren() 5537 expressions = self._parse_csv(_parse_open_json_column_def) 5538 5539 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5540 5541 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5542 args = self._parse_csv(self._parse_bitwise) 5543 5544 if self._match(TokenType.IN): 5545 return self.expression( 5546 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5547 ) 5548 5549 if haystack_first: 5550 haystack = seq_get(args, 0) 5551 needle = seq_get(args, 1) 5552 else: 5553 needle = seq_get(args, 0) 5554 haystack = seq_get(args, 1) 5555 5556 return self.expression( 5557 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5558 ) 5559 5560 def _parse_predict(self) -> exp.Predict: 5561 self._match_text_seq("MODEL") 5562 this = self._parse_table() 5563 5564 self._match(TokenType.COMMA) 5565 self._match_text_seq("TABLE") 5566 5567 return self.expression( 5568 exp.Predict, 5569 this=this, 5570 expression=self._parse_table(), 5571 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5572 ) 5573 5574 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5575 args = self._parse_csv(self._parse_table) 5576 return exp.JoinHint(this=func_name.upper(), expressions=args) 5577 5578 def _parse_substring(self) -> exp.Substring: 5579 # Postgres supports the form: substring(string [from int] [for int]) 5580 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5581 5582 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5583 5584 if self._match(TokenType.FROM): 5585 args.append(self._parse_bitwise()) 5586 if self._match(TokenType.FOR): 5587 if len(args) == 1: 5588 args.append(exp.Literal.number(1)) 5589 args.append(self._parse_bitwise()) 5590 5591 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5592 5593 def _parse_trim(self) -> exp.Trim: 5594 # https://www.w3resource.com/sql/character-functions/trim.php 5595 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5596 5597 position = None 5598 collation = None 5599 expression = None 5600 5601 if self._match_texts(self.TRIM_TYPES): 5602 position = self._prev.text.upper() 5603 5604 this = self._parse_bitwise() 5605 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5606 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5607 expression = self._parse_bitwise() 5608 5609 if invert_order: 5610 this, expression = expression, this 5611 5612 if self._match(TokenType.COLLATE): 5613 collation = self._parse_bitwise() 5614 5615 return self.expression( 5616 exp.Trim, this=this, position=position, expression=expression, collation=collation 5617 ) 5618 5619 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5620 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5621 5622 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5623 return self._parse_window(self._parse_id_var(), alias=True) 5624 5625 def _parse_respect_or_ignore_nulls( 5626 self, this: t.Optional[exp.Expression] 5627 ) -> t.Optional[exp.Expression]: 5628 if self._match_text_seq("IGNORE", "NULLS"): 5629 return self.expression(exp.IgnoreNulls, this=this) 5630 if self._match_text_seq("RESPECT", "NULLS"): 5631 return self.expression(exp.RespectNulls, this=this) 5632 return this 5633 5634 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5635 if self._match(TokenType.HAVING): 5636 self._match_texts(("MAX", "MIN")) 5637 max = self._prev.text.upper() != "MIN" 5638 return self.expression( 5639 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5640 ) 5641 5642 return this 5643 5644 def _parse_window( 5645 self, this: t.Optional[exp.Expression], alias: bool = False 5646 ) -> t.Optional[exp.Expression]: 5647 func = this 5648 comments = func.comments if isinstance(func, exp.Expression) else None 5649 5650 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5651 self._match(TokenType.WHERE) 5652 this = self.expression( 5653 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5654 ) 5655 self._match_r_paren() 5656 5657 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5658 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5659 if self._match_text_seq("WITHIN", "GROUP"): 5660 order = self._parse_wrapped(self._parse_order) 5661 this = self.expression(exp.WithinGroup, this=this, expression=order) 5662 5663 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5664 # Some dialects choose to implement and some do not. 5665 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5666 5667 # There is some code above in _parse_lambda that handles 5668 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5669 5670 # The below changes handle 5671 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5672 5673 # Oracle allows both formats 5674 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5675 # and Snowflake chose to do the same for familiarity 5676 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5677 if isinstance(this, exp.AggFunc): 5678 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5679 5680 if ignore_respect and ignore_respect is not this: 5681 ignore_respect.replace(ignore_respect.this) 5682 this = self.expression(ignore_respect.__class__, this=this) 5683 5684 this = self._parse_respect_or_ignore_nulls(this) 5685 5686 # bigquery select from window x AS (partition by ...) 5687 if alias: 5688 over = None 5689 self._match(TokenType.ALIAS) 5690 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5691 return this 5692 else: 5693 over = self._prev.text.upper() 5694 5695 if comments and isinstance(func, exp.Expression): 5696 func.pop_comments() 5697 5698 if not self._match(TokenType.L_PAREN): 5699 return self.expression( 5700 exp.Window, 5701 comments=comments, 5702 this=this, 5703 alias=self._parse_id_var(False), 5704 over=over, 5705 ) 5706 5707 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5708 5709 first = self._match(TokenType.FIRST) 5710 if self._match_text_seq("LAST"): 5711 first = False 5712 5713 partition, order = self._parse_partition_and_order() 5714 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5715 5716 if kind: 5717 self._match(TokenType.BETWEEN) 5718 start = self._parse_window_spec() 5719 self._match(TokenType.AND) 5720 end = self._parse_window_spec() 5721 5722 spec = self.expression( 5723 exp.WindowSpec, 5724 kind=kind, 5725 start=start["value"], 5726 start_side=start["side"], 5727 end=end["value"], 5728 end_side=end["side"], 5729 ) 5730 else: 5731 spec = None 5732 5733 self._match_r_paren() 5734 5735 window = self.expression( 5736 exp.Window, 5737 comments=comments, 5738 this=this, 5739 partition_by=partition, 5740 order=order, 5741 spec=spec, 5742 alias=window_alias, 5743 over=over, 5744 first=first, 5745 ) 5746 5747 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5748 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5749 return self._parse_window(window, alias=alias) 5750 5751 return window 5752 5753 def _parse_partition_and_order( 5754 self, 5755 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5756 return self._parse_partition_by(), self._parse_order() 5757 5758 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5759 self._match(TokenType.BETWEEN) 5760 5761 return { 5762 "value": ( 5763 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5764 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5765 or self._parse_bitwise() 5766 ), 5767 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5768 } 5769 5770 def _parse_alias( 5771 self, this: t.Optional[exp.Expression], explicit: bool = False 5772 ) -> t.Optional[exp.Expression]: 5773 any_token = self._match(TokenType.ALIAS) 5774 comments = self._prev_comments or [] 5775 5776 if explicit and not any_token: 5777 return this 5778 5779 if self._match(TokenType.L_PAREN): 5780 aliases = self.expression( 5781 exp.Aliases, 5782 comments=comments, 5783 this=this, 5784 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5785 ) 5786 self._match_r_paren(aliases) 5787 return aliases 5788 5789 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5790 self.STRING_ALIASES and self._parse_string_as_identifier() 5791 ) 5792 5793 if alias: 5794 comments.extend(alias.pop_comments()) 5795 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5796 column = this.this 5797 5798 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5799 if not this.comments and column and column.comments: 5800 this.comments = column.pop_comments() 5801 5802 return this 5803 5804 def _parse_id_var( 5805 self, 5806 any_token: bool = True, 5807 tokens: t.Optional[t.Collection[TokenType]] = None, 5808 ) -> t.Optional[exp.Expression]: 5809 expression = self._parse_identifier() 5810 if not expression and ( 5811 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5812 ): 5813 quoted = self._prev.token_type == TokenType.STRING 5814 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5815 5816 return expression 5817 5818 def _parse_string(self) -> t.Optional[exp.Expression]: 5819 if self._match_set(self.STRING_PARSERS): 5820 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5821 return self._parse_placeholder() 5822 5823 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5824 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5825 5826 def _parse_number(self) -> t.Optional[exp.Expression]: 5827 if self._match_set(self.NUMERIC_PARSERS): 5828 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5829 return self._parse_placeholder() 5830 5831 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5832 if self._match(TokenType.IDENTIFIER): 5833 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5834 return self._parse_placeholder() 5835 5836 def _parse_var( 5837 self, 5838 any_token: bool = False, 5839 tokens: t.Optional[t.Collection[TokenType]] = None, 5840 upper: bool = False, 5841 ) -> t.Optional[exp.Expression]: 5842 if ( 5843 (any_token and self._advance_any()) 5844 or self._match(TokenType.VAR) 5845 or (self._match_set(tokens) if tokens else False) 5846 ): 5847 return self.expression( 5848 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5849 ) 5850 return self._parse_placeholder() 5851 5852 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5853 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5854 self._advance() 5855 return self._prev 5856 return None 5857 5858 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5859 return self._parse_var() or self._parse_string() 5860 5861 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5862 return self._parse_primary() or self._parse_var(any_token=True) 5863 5864 def _parse_null(self) -> t.Optional[exp.Expression]: 5865 if self._match_set(self.NULL_TOKENS): 5866 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5867 return self._parse_placeholder() 5868 5869 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5870 if self._match(TokenType.TRUE): 5871 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5872 if self._match(TokenType.FALSE): 5873 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5874 return self._parse_placeholder() 5875 5876 def _parse_star(self) -> t.Optional[exp.Expression]: 5877 if self._match(TokenType.STAR): 5878 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5879 return self._parse_placeholder() 5880 5881 def _parse_parameter(self) -> exp.Parameter: 5882 this = self._parse_identifier() or self._parse_primary_or_var() 5883 return self.expression(exp.Parameter, this=this) 5884 5885 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5886 if self._match_set(self.PLACEHOLDER_PARSERS): 5887 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5888 if placeholder: 5889 return placeholder 5890 self._advance(-1) 5891 return None 5892 5893 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5894 if not self._match_texts(keywords): 5895 return None 5896 if self._match(TokenType.L_PAREN, advance=False): 5897 return self._parse_wrapped_csv(self._parse_expression) 5898 5899 expression = self._parse_expression() 5900 return [expression] if expression else None 5901 5902 def _parse_csv( 5903 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5904 ) -> t.List[exp.Expression]: 5905 parse_result = parse_method() 5906 items = [parse_result] if parse_result is not None else [] 5907 5908 while self._match(sep): 5909 self._add_comments(parse_result) 5910 parse_result = parse_method() 5911 if parse_result is not None: 5912 items.append(parse_result) 5913 5914 return items 5915 5916 def _parse_tokens( 5917 self, parse_method: t.Callable, expressions: t.Dict 5918 ) -> t.Optional[exp.Expression]: 5919 this = parse_method() 5920 5921 while self._match_set(expressions): 5922 this = self.expression( 5923 expressions[self._prev.token_type], 5924 this=this, 5925 comments=self._prev_comments, 5926 expression=parse_method(), 5927 ) 5928 5929 return this 5930 5931 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5932 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5933 5934 def _parse_wrapped_csv( 5935 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5936 ) -> t.List[exp.Expression]: 5937 return self._parse_wrapped( 5938 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5939 ) 5940 5941 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5942 wrapped = self._match(TokenType.L_PAREN) 5943 if not wrapped and not optional: 5944 self.raise_error("Expecting (") 5945 parse_result = parse_method() 5946 if wrapped: 5947 self._match_r_paren() 5948 return parse_result 5949 5950 def _parse_expressions(self) -> t.List[exp.Expression]: 5951 return self._parse_csv(self._parse_expression) 5952 5953 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5954 return self._parse_select() or self._parse_set_operations( 5955 self._parse_expression() if alias else self._parse_conjunction() 5956 ) 5957 5958 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5959 return self._parse_query_modifiers( 5960 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5961 ) 5962 5963 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5964 this = None 5965 if self._match_texts(self.TRANSACTION_KIND): 5966 this = self._prev.text 5967 5968 self._match_texts(("TRANSACTION", "WORK")) 5969 5970 modes = [] 5971 while True: 5972 mode = [] 5973 while self._match(TokenType.VAR): 5974 mode.append(self._prev.text) 5975 5976 if mode: 5977 modes.append(" ".join(mode)) 5978 if not self._match(TokenType.COMMA): 5979 break 5980 5981 return self.expression(exp.Transaction, this=this, modes=modes) 5982 5983 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5984 chain = None 5985 savepoint = None 5986 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5987 5988 self._match_texts(("TRANSACTION", "WORK")) 5989 5990 if self._match_text_seq("TO"): 5991 self._match_text_seq("SAVEPOINT") 5992 savepoint = self._parse_id_var() 5993 5994 if self._match(TokenType.AND): 5995 chain = not self._match_text_seq("NO") 5996 self._match_text_seq("CHAIN") 5997 5998 if is_rollback: 5999 return self.expression(exp.Rollback, savepoint=savepoint) 6000 6001 return self.expression(exp.Commit, chain=chain) 6002 6003 def _parse_refresh(self) -> exp.Refresh: 6004 self._match(TokenType.TABLE) 6005 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6006 6007 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6008 if not self._match_text_seq("ADD"): 6009 return None 6010 6011 self._match(TokenType.COLUMN) 6012 exists_column = self._parse_exists(not_=True) 6013 expression = self._parse_field_def() 6014 6015 if expression: 6016 expression.set("exists", exists_column) 6017 6018 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6019 if self._match_texts(("FIRST", "AFTER")): 6020 position = self._prev.text 6021 column_position = self.expression( 6022 exp.ColumnPosition, this=self._parse_column(), position=position 6023 ) 6024 expression.set("position", column_position) 6025 6026 return expression 6027 6028 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6029 drop = self._match(TokenType.DROP) and self._parse_drop() 6030 if drop and not isinstance(drop, exp.Command): 6031 drop.set("kind", drop.args.get("kind", "COLUMN")) 6032 return drop 6033 6034 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6035 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6036 return self.expression( 6037 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6038 ) 6039 6040 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6041 index = self._index - 1 6042 6043 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6044 return self._parse_csv( 6045 lambda: self.expression( 6046 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6047 ) 6048 ) 6049 6050 self._retreat(index) 6051 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6052 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6053 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6054 6055 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6056 if self._match_texts(self.ALTER_ALTER_PARSERS): 6057 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6058 6059 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6060 # keyword after ALTER we default to parsing this statement 6061 self._match(TokenType.COLUMN) 6062 column = self._parse_field(any_token=True) 6063 6064 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6065 return self.expression(exp.AlterColumn, this=column, drop=True) 6066 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6067 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 6068 if self._match(TokenType.COMMENT): 6069 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6070 if self._match_text_seq("DROP", "NOT", "NULL"): 6071 return self.expression( 6072 exp.AlterColumn, 6073 this=column, 6074 drop=True, 6075 allow_null=True, 6076 ) 6077 if self._match_text_seq("SET", "NOT", "NULL"): 6078 return self.expression( 6079 exp.AlterColumn, 6080 this=column, 6081 allow_null=False, 6082 ) 6083 self._match_text_seq("SET", "DATA") 6084 self._match_text_seq("TYPE") 6085 return self.expression( 6086 exp.AlterColumn, 6087 this=column, 6088 dtype=self._parse_types(), 6089 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6090 using=self._match(TokenType.USING) and self._parse_conjunction(), 6091 ) 6092 6093 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6094 if self._match_texts(("ALL", "EVEN", "AUTO")): 6095 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6096 6097 self._match_text_seq("KEY", "DISTKEY") 6098 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6099 6100 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6101 if compound: 6102 self._match_text_seq("SORTKEY") 6103 6104 if self._match(TokenType.L_PAREN, advance=False): 6105 return self.expression( 6106 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6107 ) 6108 6109 self._match_texts(("AUTO", "NONE")) 6110 return self.expression( 6111 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6112 ) 6113 6114 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6115 index = self._index - 1 6116 6117 partition_exists = self._parse_exists() 6118 if self._match(TokenType.PARTITION, advance=False): 6119 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6120 6121 self._retreat(index) 6122 return self._parse_csv(self._parse_drop_column) 6123 6124 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6125 if self._match(TokenType.COLUMN): 6126 exists = self._parse_exists() 6127 old_column = self._parse_column() 6128 to = self._match_text_seq("TO") 6129 new_column = self._parse_column() 6130 6131 if old_column is None or to is None or new_column is None: 6132 return None 6133 6134 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6135 6136 self._match_text_seq("TO") 6137 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6138 6139 def _parse_alter_table_set(self) -> exp.AlterSet: 6140 alter_set = self.expression(exp.AlterSet) 6141 6142 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6143 "TABLE", "PROPERTIES" 6144 ): 6145 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_conjunction)) 6146 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6147 alter_set.set("expressions", [self._parse_conjunction()]) 6148 elif self._match_texts(("LOGGED", "UNLOGGED")): 6149 alter_set.set("option", exp.var(self._prev.text.upper())) 6150 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6151 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6152 elif self._match_text_seq("LOCATION"): 6153 alter_set.set("location", self._parse_field()) 6154 elif self._match_text_seq("ACCESS", "METHOD"): 6155 alter_set.set("access_method", self._parse_field()) 6156 elif self._match_text_seq("TABLESPACE"): 6157 alter_set.set("tablespace", self._parse_field()) 6158 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6159 alter_set.set("file_format", [self._parse_field()]) 6160 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6161 alter_set.set("file_format", self._parse_wrapped_options()) 6162 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6163 alter_set.set("copy_options", self._parse_wrapped_options()) 6164 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6165 alter_set.set("tag", self._parse_csv(self._parse_conjunction)) 6166 else: 6167 if self._match_text_seq("SERDE"): 6168 alter_set.set("serde", self._parse_field()) 6169 6170 alter_set.set("expressions", [self._parse_properties()]) 6171 6172 return alter_set 6173 6174 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6175 start = self._prev 6176 6177 if not self._match(TokenType.TABLE): 6178 return self._parse_as_command(start) 6179 6180 exists = self._parse_exists() 6181 only = self._match_text_seq("ONLY") 6182 this = self._parse_table(schema=True) 6183 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6184 6185 if self._next: 6186 self._advance() 6187 6188 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6189 if parser: 6190 actions = ensure_list(parser(self)) 6191 options = self._parse_csv(self._parse_property) 6192 6193 if not self._curr and actions: 6194 return self.expression( 6195 exp.AlterTable, 6196 this=this, 6197 exists=exists, 6198 actions=actions, 6199 only=only, 6200 options=options, 6201 cluster=cluster, 6202 ) 6203 6204 return self._parse_as_command(start) 6205 6206 def _parse_merge(self) -> exp.Merge: 6207 self._match(TokenType.INTO) 6208 target = self._parse_table() 6209 6210 if target and self._match(TokenType.ALIAS, advance=False): 6211 target.set("alias", self._parse_table_alias()) 6212 6213 self._match(TokenType.USING) 6214 using = self._parse_table() 6215 6216 self._match(TokenType.ON) 6217 on = self._parse_conjunction() 6218 6219 return self.expression( 6220 exp.Merge, 6221 this=target, 6222 using=using, 6223 on=on, 6224 expressions=self._parse_when_matched(), 6225 ) 6226 6227 def _parse_when_matched(self) -> t.List[exp.When]: 6228 whens = [] 6229 6230 while self._match(TokenType.WHEN): 6231 matched = not self._match(TokenType.NOT) 6232 self._match_text_seq("MATCHED") 6233 source = ( 6234 False 6235 if self._match_text_seq("BY", "TARGET") 6236 else self._match_text_seq("BY", "SOURCE") 6237 ) 6238 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6239 6240 self._match(TokenType.THEN) 6241 6242 if self._match(TokenType.INSERT): 6243 _this = self._parse_star() 6244 if _this: 6245 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6246 else: 6247 then = self.expression( 6248 exp.Insert, 6249 this=self._parse_value(), 6250 expression=self._match_text_seq("VALUES") and self._parse_value(), 6251 ) 6252 elif self._match(TokenType.UPDATE): 6253 expressions = self._parse_star() 6254 if expressions: 6255 then = self.expression(exp.Update, expressions=expressions) 6256 else: 6257 then = self.expression( 6258 exp.Update, 6259 expressions=self._match(TokenType.SET) 6260 and self._parse_csv(self._parse_equality), 6261 ) 6262 elif self._match(TokenType.DELETE): 6263 then = self.expression(exp.Var, this=self._prev.text) 6264 else: 6265 then = None 6266 6267 whens.append( 6268 self.expression( 6269 exp.When, 6270 matched=matched, 6271 source=source, 6272 condition=condition, 6273 then=then, 6274 ) 6275 ) 6276 return whens 6277 6278 def _parse_show(self) -> t.Optional[exp.Expression]: 6279 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6280 if parser: 6281 return parser(self) 6282 return self._parse_as_command(self._prev) 6283 6284 def _parse_set_item_assignment( 6285 self, kind: t.Optional[str] = None 6286 ) -> t.Optional[exp.Expression]: 6287 index = self._index 6288 6289 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6290 return self._parse_set_transaction(global_=kind == "GLOBAL") 6291 6292 left = self._parse_primary() or self._parse_column() 6293 assignment_delimiter = self._match_texts(("=", "TO")) 6294 6295 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6296 self._retreat(index) 6297 return None 6298 6299 right = self._parse_statement() or self._parse_id_var() 6300 if isinstance(right, (exp.Column, exp.Identifier)): 6301 right = exp.var(right.name) 6302 6303 this = self.expression(exp.EQ, this=left, expression=right) 6304 return self.expression(exp.SetItem, this=this, kind=kind) 6305 6306 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6307 self._match_text_seq("TRANSACTION") 6308 characteristics = self._parse_csv( 6309 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6310 ) 6311 return self.expression( 6312 exp.SetItem, 6313 expressions=characteristics, 6314 kind="TRANSACTION", 6315 **{"global": global_}, # type: ignore 6316 ) 6317 6318 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6319 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6320 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6321 6322 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6323 index = self._index 6324 set_ = self.expression( 6325 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6326 ) 6327 6328 if self._curr: 6329 self._retreat(index) 6330 return self._parse_as_command(self._prev) 6331 6332 return set_ 6333 6334 def _parse_var_from_options( 6335 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6336 ) -> t.Optional[exp.Var]: 6337 start = self._curr 6338 if not start: 6339 return None 6340 6341 option = start.text.upper() 6342 continuations = options.get(option) 6343 6344 index = self._index 6345 self._advance() 6346 for keywords in continuations or []: 6347 if isinstance(keywords, str): 6348 keywords = (keywords,) 6349 6350 if self._match_text_seq(*keywords): 6351 option = f"{option} {' '.join(keywords)}" 6352 break 6353 else: 6354 if continuations or continuations is None: 6355 if raise_unmatched: 6356 self.raise_error(f"Unknown option {option}") 6357 6358 self._retreat(index) 6359 return None 6360 6361 return exp.var(option) 6362 6363 def _parse_as_command(self, start: Token) -> exp.Command: 6364 while self._curr: 6365 self._advance() 6366 text = self._find_sql(start, self._prev) 6367 size = len(start.text) 6368 self._warn_unsupported() 6369 return exp.Command(this=text[:size], expression=text[size:]) 6370 6371 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6372 settings = [] 6373 6374 self._match_l_paren() 6375 kind = self._parse_id_var() 6376 6377 if self._match(TokenType.L_PAREN): 6378 while True: 6379 key = self._parse_id_var() 6380 value = self._parse_primary() 6381 6382 if not key and value is None: 6383 break 6384 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6385 self._match(TokenType.R_PAREN) 6386 6387 self._match_r_paren() 6388 6389 return self.expression( 6390 exp.DictProperty, 6391 this=this, 6392 kind=kind.this if kind else None, 6393 settings=settings, 6394 ) 6395 6396 def _parse_dict_range(self, this: str) -> exp.DictRange: 6397 self._match_l_paren() 6398 has_min = self._match_text_seq("MIN") 6399 if has_min: 6400 min = self._parse_var() or self._parse_primary() 6401 self._match_text_seq("MAX") 6402 max = self._parse_var() or self._parse_primary() 6403 else: 6404 max = self._parse_var() or self._parse_primary() 6405 min = exp.Literal.number(0) 6406 self._match_r_paren() 6407 return self.expression(exp.DictRange, this=this, min=min, max=max) 6408 6409 def _parse_comprehension( 6410 self, this: t.Optional[exp.Expression] 6411 ) -> t.Optional[exp.Comprehension]: 6412 index = self._index 6413 expression = self._parse_column() 6414 if not self._match(TokenType.IN): 6415 self._retreat(index - 1) 6416 return None 6417 iterator = self._parse_column() 6418 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6419 return self.expression( 6420 exp.Comprehension, 6421 this=this, 6422 expression=expression, 6423 iterator=iterator, 6424 condition=condition, 6425 ) 6426 6427 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6428 if self._match(TokenType.HEREDOC_STRING): 6429 return self.expression(exp.Heredoc, this=self._prev.text) 6430 6431 if not self._match_text_seq("$"): 6432 return None 6433 6434 tags = ["$"] 6435 tag_text = None 6436 6437 if self._is_connected(): 6438 self._advance() 6439 tags.append(self._prev.text.upper()) 6440 else: 6441 self.raise_error("No closing $ found") 6442 6443 if tags[-1] != "$": 6444 if self._is_connected() and self._match_text_seq("$"): 6445 tag_text = tags[-1] 6446 tags.append("$") 6447 else: 6448 self.raise_error("No closing $ found") 6449 6450 heredoc_start = self._curr 6451 6452 while self._curr: 6453 if self._match_text_seq(*tags, advance=False): 6454 this = self._find_sql(heredoc_start, self._prev) 6455 self._advance(len(tags)) 6456 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6457 6458 self._advance() 6459 6460 self.raise_error(f"No closing {''.join(tags)} found") 6461 return None 6462 6463 def _find_parser( 6464 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6465 ) -> t.Optional[t.Callable]: 6466 if not self._curr: 6467 return None 6468 6469 index = self._index 6470 this = [] 6471 while True: 6472 # The current token might be multiple words 6473 curr = self._curr.text.upper() 6474 key = curr.split(" ") 6475 this.append(curr) 6476 6477 self._advance() 6478 result, trie = in_trie(trie, key) 6479 if result == TrieResult.FAILED: 6480 break 6481 6482 if result == TrieResult.EXISTS: 6483 subparser = parsers[" ".join(this)] 6484 return subparser 6485 6486 self._retreat(index) 6487 return None 6488 6489 def _match(self, token_type, advance=True, expression=None): 6490 if not self._curr: 6491 return None 6492 6493 if self._curr.token_type == token_type: 6494 if advance: 6495 self._advance() 6496 self._add_comments(expression) 6497 return True 6498 6499 return None 6500 6501 def _match_set(self, types, advance=True): 6502 if not self._curr: 6503 return None 6504 6505 if self._curr.token_type in types: 6506 if advance: 6507 self._advance() 6508 return True 6509 6510 return None 6511 6512 def _match_pair(self, token_type_a, token_type_b, advance=True): 6513 if not self._curr or not self._next: 6514 return None 6515 6516 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6517 if advance: 6518 self._advance(2) 6519 return True 6520 6521 return None 6522 6523 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6524 if not self._match(TokenType.L_PAREN, expression=expression): 6525 self.raise_error("Expecting (") 6526 6527 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6528 if not self._match(TokenType.R_PAREN, expression=expression): 6529 self.raise_error("Expecting )") 6530 6531 def _match_texts(self, texts, advance=True): 6532 if self._curr and self._curr.text.upper() in texts: 6533 if advance: 6534 self._advance() 6535 return True 6536 return None 6537 6538 def _match_text_seq(self, *texts, advance=True): 6539 index = self._index 6540 for text in texts: 6541 if self._curr and self._curr.text.upper() == text: 6542 self._advance() 6543 else: 6544 self._retreat(index) 6545 return None 6546 6547 if not advance: 6548 self._retreat(index) 6549 6550 return True 6551 6552 def _replace_lambda( 6553 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6554 ) -> t.Optional[exp.Expression]: 6555 if not node: 6556 return node 6557 6558 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6559 6560 for column in node.find_all(exp.Column): 6561 typ = lambda_types.get(column.parts[0].name) 6562 if typ is not None: 6563 dot_or_id = column.to_dot() if column.table else column.this 6564 6565 if typ: 6566 dot_or_id = self.expression( 6567 exp.Cast, 6568 this=dot_or_id, 6569 to=typ, 6570 ) 6571 6572 parent = column.parent 6573 6574 while isinstance(parent, exp.Dot): 6575 if not isinstance(parent.parent, exp.Dot): 6576 parent.replace(dot_or_id) 6577 break 6578 parent = parent.parent 6579 else: 6580 if column is node: 6581 node = dot_or_id 6582 else: 6583 column.replace(dot_or_id) 6584 return node 6585 6586 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6587 start = self._prev 6588 6589 # Not to be confused with TRUNCATE(number, decimals) function call 6590 if self._match(TokenType.L_PAREN): 6591 self._retreat(self._index - 2) 6592 return self._parse_function() 6593 6594 # Clickhouse supports TRUNCATE DATABASE as well 6595 is_database = self._match(TokenType.DATABASE) 6596 6597 self._match(TokenType.TABLE) 6598 6599 exists = self._parse_exists(not_=False) 6600 6601 expressions = self._parse_csv( 6602 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6603 ) 6604 6605 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6606 6607 if self._match_text_seq("RESTART", "IDENTITY"): 6608 identity = "RESTART" 6609 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6610 identity = "CONTINUE" 6611 else: 6612 identity = None 6613 6614 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6615 option = self._prev.text 6616 else: 6617 option = None 6618 6619 partition = self._parse_partition() 6620 6621 # Fallback case 6622 if self._curr: 6623 return self._parse_as_command(start) 6624 6625 return self.expression( 6626 exp.TruncateTable, 6627 expressions=expressions, 6628 is_database=is_database, 6629 exists=exists, 6630 cluster=cluster, 6631 identity=identity, 6632 option=option, 6633 partition=partition, 6634 ) 6635 6636 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6637 this = self._parse_ordered(self._parse_opclass) 6638 6639 if not self._match(TokenType.WITH): 6640 return this 6641 6642 op = self._parse_var(any_token=True) 6643 6644 return self.expression(exp.WithOperator, this=this, op=op) 6645 6646 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6647 opts = [] 6648 self._match(TokenType.EQ) 6649 self._match(TokenType.L_PAREN) 6650 while self._curr and not self._match(TokenType.R_PAREN): 6651 opts.append(self._parse_conjunction()) 6652 self._match(TokenType.COMMA) 6653 return opts 6654 6655 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6656 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6657 6658 options = [] 6659 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6660 option = self._parse_unquoted_field() 6661 value = None 6662 6663 # Some options are defined as functions with the values as params 6664 if not isinstance(option, exp.Func): 6665 prev = self._prev.text.upper() 6666 # Different dialects might separate options and values by white space, "=" and "AS" 6667 self._match(TokenType.EQ) 6668 self._match(TokenType.ALIAS) 6669 6670 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6671 # Snowflake FILE_FORMAT case 6672 value = self._parse_wrapped_options() 6673 else: 6674 value = self._parse_unquoted_field() 6675 6676 param = self.expression(exp.CopyParameter, this=option, expression=value) 6677 options.append(param) 6678 6679 if sep: 6680 self._match(sep) 6681 6682 return options 6683 6684 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6685 expr = self.expression(exp.Credentials) 6686 6687 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6688 expr.set("storage", self._parse_conjunction()) 6689 if self._match_text_seq("CREDENTIALS"): 6690 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6691 creds = ( 6692 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6693 ) 6694 expr.set("credentials", creds) 6695 if self._match_text_seq("ENCRYPTION"): 6696 expr.set("encryption", self._parse_wrapped_options()) 6697 if self._match_text_seq("IAM_ROLE"): 6698 expr.set("iam_role", self._parse_field()) 6699 if self._match_text_seq("REGION"): 6700 expr.set("region", self._parse_field()) 6701 6702 return expr 6703 6704 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6705 return self._parse_field() 6706 6707 def _parse_copy(self) -> exp.Copy | exp.Command: 6708 start = self._prev 6709 6710 self._match(TokenType.INTO) 6711 6712 this = ( 6713 self._parse_conjunction() 6714 if self._match(TokenType.L_PAREN, advance=False) 6715 else self._parse_table(schema=True) 6716 ) 6717 6718 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6719 6720 files = self._parse_csv(self._parse_file_location) 6721 credentials = self._parse_credentials() 6722 6723 self._match_text_seq("WITH") 6724 6725 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6726 6727 # Fallback case 6728 if self._curr: 6729 return self._parse_as_command(start) 6730 6731 return self.expression( 6732 exp.Copy, 6733 this=this, 6734 kind=kind, 6735 credentials=credentials, 6736 files=files, 6737 params=params, 6738 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1208 def __init__( 1209 self, 1210 error_level: t.Optional[ErrorLevel] = None, 1211 error_message_context: int = 100, 1212 max_errors: int = 3, 1213 dialect: DialectType = None, 1214 ): 1215 from sqlglot.dialects import Dialect 1216 1217 self.error_level = error_level or ErrorLevel.IMMEDIATE 1218 self.error_message_context = error_message_context 1219 self.max_errors = max_errors 1220 self.dialect = Dialect.get_or_raise(dialect) 1221 self.reset()
1233 def parse( 1234 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1235 ) -> t.List[t.Optional[exp.Expression]]: 1236 """ 1237 Parses a list of tokens and returns a list of syntax trees, one tree 1238 per parsed SQL statement. 1239 1240 Args: 1241 raw_tokens: The list of tokens. 1242 sql: The original SQL string, used to produce helpful debug messages. 1243 1244 Returns: 1245 The list of the produced syntax trees. 1246 """ 1247 return self._parse( 1248 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1249 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1251 def parse_into( 1252 self, 1253 expression_types: exp.IntoType, 1254 raw_tokens: t.List[Token], 1255 sql: t.Optional[str] = None, 1256 ) -> t.List[t.Optional[exp.Expression]]: 1257 """ 1258 Parses a list of tokens into a given Expression type. If a collection of Expression 1259 types is given instead, this method will try to parse the token list into each one 1260 of them, stopping at the first for which the parsing succeeds. 1261 1262 Args: 1263 expression_types: The expression type(s) to try and parse the token list into. 1264 raw_tokens: The list of tokens. 1265 sql: The original SQL string, used to produce helpful debug messages. 1266 1267 Returns: 1268 The target Expression. 1269 """ 1270 errors = [] 1271 for expression_type in ensure_list(expression_types): 1272 parser = self.EXPRESSION_PARSERS.get(expression_type) 1273 if not parser: 1274 raise TypeError(f"No parser registered for {expression_type}") 1275 1276 try: 1277 return self._parse(parser, raw_tokens, sql) 1278 except ParseError as e: 1279 e.errors[0]["into_expression"] = expression_type 1280 errors.append(e) 1281 1282 raise ParseError( 1283 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1284 errors=merge_errors(errors), 1285 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1325 def check_errors(self) -> None: 1326 """Logs or raises any found errors, depending on the chosen error level setting.""" 1327 if self.error_level == ErrorLevel.WARN: 1328 for error in self.errors: 1329 logger.error(str(error)) 1330 elif self.error_level == ErrorLevel.RAISE and self.errors: 1331 raise ParseError( 1332 concat_messages(self.errors, self.max_errors), 1333 errors=merge_errors(self.errors), 1334 )
Logs or raises any found errors, depending on the chosen error level setting.
1336 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1337 """ 1338 Appends an error in the list of recorded errors or raises it, depending on the chosen 1339 error level setting. 1340 """ 1341 token = token or self._curr or self._prev or Token.string("") 1342 start = token.start 1343 end = token.end + 1 1344 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1345 highlight = self.sql[start:end] 1346 end_context = self.sql[end : end + self.error_message_context] 1347 1348 error = ParseError.new( 1349 f"{message}. Line {token.line}, Col: {token.col}.\n" 1350 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1351 description=message, 1352 line=token.line, 1353 col=token.col, 1354 start_context=start_context, 1355 highlight=highlight, 1356 end_context=end_context, 1357 ) 1358 1359 if self.error_level == ErrorLevel.IMMEDIATE: 1360 raise error 1361 1362 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1364 def expression( 1365 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1366 ) -> E: 1367 """ 1368 Creates a new, validated Expression. 1369 1370 Args: 1371 exp_class: The expression class to instantiate. 1372 comments: An optional list of comments to attach to the expression. 1373 kwargs: The arguments to set for the expression along with their respective values. 1374 1375 Returns: 1376 The target expression. 1377 """ 1378 instance = exp_class(**kwargs) 1379 instance.add_comments(comments) if comments else self._add_comments(instance) 1380 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1387 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1388 """ 1389 Validates an Expression, making sure that all its mandatory arguments are set. 1390 1391 Args: 1392 expression: The expression to validate. 1393 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1394 1395 Returns: 1396 The validated expression. 1397 """ 1398 if self.error_level != ErrorLevel.IGNORE: 1399 for error_message in expression.error_messages(args): 1400 self.raise_error(error_message) 1401 1402 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.