sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "HEX": build_hex, 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "LOWER": build_lower, 162 "MOD": build_mod, 163 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 164 if len(args) != 2 165 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 166 "TIME_TO_TIME_STR": lambda args: exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 "TO_HEX": build_hex, 171 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 172 this=exp.Cast( 173 this=seq_get(args, 0), 174 to=exp.DataType(this=exp.DataType.Type.TEXT), 175 ), 176 start=exp.Literal.number(1), 177 length=exp.Literal.number(10), 178 ), 179 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 180 "UPPER": build_upper, 181 "VAR_MAP": build_var_map, 182 } 183 184 NO_PAREN_FUNCTIONS = { 185 TokenType.CURRENT_DATE: exp.CurrentDate, 186 TokenType.CURRENT_DATETIME: exp.CurrentDate, 187 TokenType.CURRENT_TIME: exp.CurrentTime, 188 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 189 TokenType.CURRENT_USER: exp.CurrentUser, 190 } 191 192 STRUCT_TYPE_TOKENS = { 193 TokenType.NESTED, 194 TokenType.OBJECT, 195 TokenType.STRUCT, 196 } 197 198 NESTED_TYPE_TOKENS = { 199 TokenType.ARRAY, 200 TokenType.LIST, 201 TokenType.LOWCARDINALITY, 202 TokenType.MAP, 203 TokenType.NULLABLE, 204 *STRUCT_TYPE_TOKENS, 205 } 206 207 ENUM_TYPE_TOKENS = { 208 TokenType.ENUM, 209 TokenType.ENUM8, 210 TokenType.ENUM16, 211 } 212 213 AGGREGATE_TYPE_TOKENS = { 214 TokenType.AGGREGATEFUNCTION, 215 TokenType.SIMPLEAGGREGATEFUNCTION, 216 } 217 218 TYPE_TOKENS = { 219 TokenType.BIT, 220 TokenType.BOOLEAN, 221 TokenType.TINYINT, 222 TokenType.UTINYINT, 223 TokenType.SMALLINT, 224 TokenType.USMALLINT, 225 TokenType.INT, 226 TokenType.UINT, 227 TokenType.BIGINT, 228 TokenType.UBIGINT, 229 TokenType.INT128, 230 TokenType.UINT128, 231 TokenType.INT256, 232 TokenType.UINT256, 233 TokenType.MEDIUMINT, 234 TokenType.UMEDIUMINT, 235 TokenType.FIXEDSTRING, 236 TokenType.FLOAT, 237 TokenType.DOUBLE, 238 TokenType.CHAR, 239 TokenType.NCHAR, 240 TokenType.VARCHAR, 241 TokenType.NVARCHAR, 242 TokenType.BPCHAR, 243 TokenType.TEXT, 244 TokenType.MEDIUMTEXT, 245 TokenType.LONGTEXT, 246 TokenType.MEDIUMBLOB, 247 TokenType.LONGBLOB, 248 TokenType.BINARY, 249 TokenType.VARBINARY, 250 TokenType.JSON, 251 TokenType.JSONB, 252 TokenType.INTERVAL, 253 TokenType.TINYBLOB, 254 TokenType.TINYTEXT, 255 TokenType.TIME, 256 TokenType.TIMETZ, 257 TokenType.TIMESTAMP, 258 TokenType.TIMESTAMP_S, 259 TokenType.TIMESTAMP_MS, 260 TokenType.TIMESTAMP_NS, 261 TokenType.TIMESTAMPTZ, 262 TokenType.TIMESTAMPLTZ, 263 TokenType.TIMESTAMPNTZ, 264 TokenType.DATETIME, 265 TokenType.DATETIME64, 266 TokenType.DATE, 267 TokenType.DATE32, 268 TokenType.INT4RANGE, 269 TokenType.INT4MULTIRANGE, 270 TokenType.INT8RANGE, 271 TokenType.INT8MULTIRANGE, 272 TokenType.NUMRANGE, 273 TokenType.NUMMULTIRANGE, 274 TokenType.TSRANGE, 275 TokenType.TSMULTIRANGE, 276 TokenType.TSTZRANGE, 277 TokenType.TSTZMULTIRANGE, 278 TokenType.DATERANGE, 279 TokenType.DATEMULTIRANGE, 280 TokenType.DECIMAL, 281 TokenType.UDECIMAL, 282 TokenType.BIGDECIMAL, 283 TokenType.UUID, 284 TokenType.GEOGRAPHY, 285 TokenType.GEOMETRY, 286 TokenType.HLLSKETCH, 287 TokenType.HSTORE, 288 TokenType.PSEUDO_TYPE, 289 TokenType.SUPER, 290 TokenType.SERIAL, 291 TokenType.SMALLSERIAL, 292 TokenType.BIGSERIAL, 293 TokenType.XML, 294 TokenType.YEAR, 295 TokenType.UNIQUEIDENTIFIER, 296 TokenType.USERDEFINED, 297 TokenType.MONEY, 298 TokenType.SMALLMONEY, 299 TokenType.ROWVERSION, 300 TokenType.IMAGE, 301 TokenType.VARIANT, 302 TokenType.VECTOR, 303 TokenType.OBJECT, 304 TokenType.OBJECT_IDENTIFIER, 305 TokenType.INET, 306 TokenType.IPADDRESS, 307 TokenType.IPPREFIX, 308 TokenType.IPV4, 309 TokenType.IPV6, 310 TokenType.UNKNOWN, 311 TokenType.NULL, 312 TokenType.NAME, 313 TokenType.TDIGEST, 314 *ENUM_TYPE_TOKENS, 315 *NESTED_TYPE_TOKENS, 316 *AGGREGATE_TYPE_TOKENS, 317 } 318 319 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 320 TokenType.BIGINT: TokenType.UBIGINT, 321 TokenType.INT: TokenType.UINT, 322 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 323 TokenType.SMALLINT: TokenType.USMALLINT, 324 TokenType.TINYINT: TokenType.UTINYINT, 325 TokenType.DECIMAL: TokenType.UDECIMAL, 326 } 327 328 SUBQUERY_PREDICATES = { 329 TokenType.ANY: exp.Any, 330 TokenType.ALL: exp.All, 331 TokenType.EXISTS: exp.Exists, 332 TokenType.SOME: exp.Any, 333 } 334 335 RESERVED_TOKENS = { 336 *Tokenizer.SINGLE_TOKENS.values(), 337 TokenType.SELECT, 338 } - {TokenType.IDENTIFIER} 339 340 DB_CREATABLES = { 341 TokenType.DATABASE, 342 TokenType.DICTIONARY, 343 TokenType.MODEL, 344 TokenType.SCHEMA, 345 TokenType.SEQUENCE, 346 TokenType.STORAGE_INTEGRATION, 347 TokenType.TABLE, 348 TokenType.TAG, 349 TokenType.VIEW, 350 TokenType.WAREHOUSE, 351 TokenType.STREAMLIT, 352 } 353 354 CREATABLES = { 355 TokenType.COLUMN, 356 TokenType.CONSTRAINT, 357 TokenType.FOREIGN_KEY, 358 TokenType.FUNCTION, 359 TokenType.INDEX, 360 TokenType.PROCEDURE, 361 *DB_CREATABLES, 362 } 363 364 # Tokens that can represent identifiers 365 ID_VAR_TOKENS = { 366 TokenType.VAR, 367 TokenType.ANTI, 368 TokenType.APPLY, 369 TokenType.ASC, 370 TokenType.ASOF, 371 TokenType.AUTO_INCREMENT, 372 TokenType.BEGIN, 373 TokenType.BPCHAR, 374 TokenType.CACHE, 375 TokenType.CASE, 376 TokenType.COLLATE, 377 TokenType.COMMAND, 378 TokenType.COMMENT, 379 TokenType.COMMIT, 380 TokenType.CONSTRAINT, 381 TokenType.COPY, 382 TokenType.DEFAULT, 383 TokenType.DELETE, 384 TokenType.DESC, 385 TokenType.DESCRIBE, 386 TokenType.DICTIONARY, 387 TokenType.DIV, 388 TokenType.END, 389 TokenType.EXECUTE, 390 TokenType.ESCAPE, 391 TokenType.FALSE, 392 TokenType.FIRST, 393 TokenType.FILTER, 394 TokenType.FINAL, 395 TokenType.FORMAT, 396 TokenType.FULL, 397 TokenType.IDENTIFIER, 398 TokenType.IS, 399 TokenType.ISNULL, 400 TokenType.INTERVAL, 401 TokenType.KEEP, 402 TokenType.KILL, 403 TokenType.LEFT, 404 TokenType.LOAD, 405 TokenType.MERGE, 406 TokenType.NATURAL, 407 TokenType.NEXT, 408 TokenType.OFFSET, 409 TokenType.OPERATOR, 410 TokenType.ORDINALITY, 411 TokenType.OVERLAPS, 412 TokenType.OVERWRITE, 413 TokenType.PARTITION, 414 TokenType.PERCENT, 415 TokenType.PIVOT, 416 TokenType.PRAGMA, 417 TokenType.RANGE, 418 TokenType.RECURSIVE, 419 TokenType.REFERENCES, 420 TokenType.REFRESH, 421 TokenType.REPLACE, 422 TokenType.RIGHT, 423 TokenType.ROLLUP, 424 TokenType.ROW, 425 TokenType.ROWS, 426 TokenType.SEMI, 427 TokenType.SET, 428 TokenType.SETTINGS, 429 TokenType.SHOW, 430 TokenType.TEMPORARY, 431 TokenType.TOP, 432 TokenType.TRUE, 433 TokenType.TRUNCATE, 434 TokenType.UNIQUE, 435 TokenType.UNNEST, 436 TokenType.UNPIVOT, 437 TokenType.UPDATE, 438 TokenType.USE, 439 TokenType.VOLATILE, 440 TokenType.WINDOW, 441 *CREATABLES, 442 *SUBQUERY_PREDICATES, 443 *TYPE_TOKENS, 444 *NO_PAREN_FUNCTIONS, 445 } 446 447 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 448 449 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 450 TokenType.ANTI, 451 TokenType.APPLY, 452 TokenType.ASOF, 453 TokenType.FULL, 454 TokenType.LEFT, 455 TokenType.LOCK, 456 TokenType.NATURAL, 457 TokenType.OFFSET, 458 TokenType.RIGHT, 459 TokenType.SEMI, 460 TokenType.WINDOW, 461 } 462 463 ALIAS_TOKENS = ID_VAR_TOKENS 464 465 ARRAY_CONSTRUCTORS = { 466 "ARRAY": exp.Array, 467 "LIST": exp.List, 468 } 469 470 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 471 472 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 473 474 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 475 476 FUNC_TOKENS = { 477 TokenType.COLLATE, 478 TokenType.COMMAND, 479 TokenType.CURRENT_DATE, 480 TokenType.CURRENT_DATETIME, 481 TokenType.CURRENT_TIMESTAMP, 482 TokenType.CURRENT_TIME, 483 TokenType.CURRENT_USER, 484 TokenType.FILTER, 485 TokenType.FIRST, 486 TokenType.FORMAT, 487 TokenType.GLOB, 488 TokenType.IDENTIFIER, 489 TokenType.INDEX, 490 TokenType.ISNULL, 491 TokenType.ILIKE, 492 TokenType.INSERT, 493 TokenType.LIKE, 494 TokenType.MERGE, 495 TokenType.OFFSET, 496 TokenType.PRIMARY_KEY, 497 TokenType.RANGE, 498 TokenType.REPLACE, 499 TokenType.RLIKE, 500 TokenType.ROW, 501 TokenType.UNNEST, 502 TokenType.VAR, 503 TokenType.LEFT, 504 TokenType.RIGHT, 505 TokenType.SEQUENCE, 506 TokenType.DATE, 507 TokenType.DATETIME, 508 TokenType.TABLE, 509 TokenType.TIMESTAMP, 510 TokenType.TIMESTAMPTZ, 511 TokenType.TRUNCATE, 512 TokenType.WINDOW, 513 TokenType.XOR, 514 *TYPE_TOKENS, 515 *SUBQUERY_PREDICATES, 516 } 517 518 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 519 TokenType.AND: exp.And, 520 } 521 522 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 523 TokenType.COLON_EQ: exp.PropertyEQ, 524 } 525 526 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 527 TokenType.OR: exp.Or, 528 } 529 530 EQUALITY = { 531 TokenType.EQ: exp.EQ, 532 TokenType.NEQ: exp.NEQ, 533 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 534 } 535 536 COMPARISON = { 537 TokenType.GT: exp.GT, 538 TokenType.GTE: exp.GTE, 539 TokenType.LT: exp.LT, 540 TokenType.LTE: exp.LTE, 541 } 542 543 BITWISE = { 544 TokenType.AMP: exp.BitwiseAnd, 545 TokenType.CARET: exp.BitwiseXor, 546 TokenType.PIPE: exp.BitwiseOr, 547 } 548 549 TERM = { 550 TokenType.DASH: exp.Sub, 551 TokenType.PLUS: exp.Add, 552 TokenType.MOD: exp.Mod, 553 TokenType.COLLATE: exp.Collate, 554 } 555 556 FACTOR = { 557 TokenType.DIV: exp.IntDiv, 558 TokenType.LR_ARROW: exp.Distance, 559 TokenType.SLASH: exp.Div, 560 TokenType.STAR: exp.Mul, 561 } 562 563 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 564 565 TIMES = { 566 TokenType.TIME, 567 TokenType.TIMETZ, 568 } 569 570 TIMESTAMPS = { 571 TokenType.TIMESTAMP, 572 TokenType.TIMESTAMPTZ, 573 TokenType.TIMESTAMPLTZ, 574 *TIMES, 575 } 576 577 SET_OPERATIONS = { 578 TokenType.UNION, 579 TokenType.INTERSECT, 580 TokenType.EXCEPT, 581 } 582 583 JOIN_METHODS = { 584 TokenType.ASOF, 585 TokenType.NATURAL, 586 TokenType.POSITIONAL, 587 } 588 589 JOIN_SIDES = { 590 TokenType.LEFT, 591 TokenType.RIGHT, 592 TokenType.FULL, 593 } 594 595 JOIN_KINDS = { 596 TokenType.ANTI, 597 TokenType.CROSS, 598 TokenType.INNER, 599 TokenType.OUTER, 600 TokenType.SEMI, 601 TokenType.STRAIGHT_JOIN, 602 } 603 604 JOIN_HINTS: t.Set[str] = set() 605 606 LAMBDAS = { 607 TokenType.ARROW: lambda self, expressions: self.expression( 608 exp.Lambda, 609 this=self._replace_lambda( 610 self._parse_assignment(), 611 expressions, 612 ), 613 expressions=expressions, 614 ), 615 TokenType.FARROW: lambda self, expressions: self.expression( 616 exp.Kwarg, 617 this=exp.var(expressions[0].name), 618 expression=self._parse_assignment(), 619 ), 620 } 621 622 COLUMN_OPERATORS = { 623 TokenType.DOT: None, 624 TokenType.DCOLON: lambda self, this, to: self.expression( 625 exp.Cast if self.STRICT_CAST else exp.TryCast, 626 this=this, 627 to=to, 628 ), 629 TokenType.ARROW: lambda self, this, path: self.expression( 630 exp.JSONExtract, 631 this=this, 632 expression=self.dialect.to_json_path(path), 633 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 634 ), 635 TokenType.DARROW: lambda self, this, path: self.expression( 636 exp.JSONExtractScalar, 637 this=this, 638 expression=self.dialect.to_json_path(path), 639 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 640 ), 641 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 642 exp.JSONBExtract, 643 this=this, 644 expression=path, 645 ), 646 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 647 exp.JSONBExtractScalar, 648 this=this, 649 expression=path, 650 ), 651 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 652 exp.JSONBContains, 653 this=this, 654 expression=key, 655 ), 656 } 657 658 EXPRESSION_PARSERS = { 659 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 660 exp.Column: lambda self: self._parse_column(), 661 exp.Condition: lambda self: self._parse_assignment(), 662 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 663 exp.Expression: lambda self: self._parse_expression(), 664 exp.From: lambda self: self._parse_from(joins=True), 665 exp.Group: lambda self: self._parse_group(), 666 exp.Having: lambda self: self._parse_having(), 667 exp.Identifier: lambda self: self._parse_id_var(), 668 exp.Join: lambda self: self._parse_join(), 669 exp.Lambda: lambda self: self._parse_lambda(), 670 exp.Lateral: lambda self: self._parse_lateral(), 671 exp.Limit: lambda self: self._parse_limit(), 672 exp.Offset: lambda self: self._parse_offset(), 673 exp.Order: lambda self: self._parse_order(), 674 exp.Ordered: lambda self: self._parse_ordered(), 675 exp.Properties: lambda self: self._parse_properties(), 676 exp.Qualify: lambda self: self._parse_qualify(), 677 exp.Returning: lambda self: self._parse_returning(), 678 exp.Select: lambda self: self._parse_select(), 679 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 680 exp.Table: lambda self: self._parse_table_parts(), 681 exp.TableAlias: lambda self: self._parse_table_alias(), 682 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 683 exp.Where: lambda self: self._parse_where(), 684 exp.Window: lambda self: self._parse_named_window(), 685 exp.With: lambda self: self._parse_with(), 686 "JOIN_TYPE": lambda self: self._parse_join_parts(), 687 } 688 689 STATEMENT_PARSERS = { 690 TokenType.ALTER: lambda self: self._parse_alter(), 691 TokenType.BEGIN: lambda self: self._parse_transaction(), 692 TokenType.CACHE: lambda self: self._parse_cache(), 693 TokenType.COMMENT: lambda self: self._parse_comment(), 694 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 695 TokenType.COPY: lambda self: self._parse_copy(), 696 TokenType.CREATE: lambda self: self._parse_create(), 697 TokenType.DELETE: lambda self: self._parse_delete(), 698 TokenType.DESC: lambda self: self._parse_describe(), 699 TokenType.DESCRIBE: lambda self: self._parse_describe(), 700 TokenType.DROP: lambda self: self._parse_drop(), 701 TokenType.INSERT: lambda self: self._parse_insert(), 702 TokenType.KILL: lambda self: self._parse_kill(), 703 TokenType.LOAD: lambda self: self._parse_load(), 704 TokenType.MERGE: lambda self: self._parse_merge(), 705 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 706 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 707 TokenType.REFRESH: lambda self: self._parse_refresh(), 708 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 709 TokenType.SET: lambda self: self._parse_set(), 710 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 711 TokenType.UNCACHE: lambda self: self._parse_uncache(), 712 TokenType.UPDATE: lambda self: self._parse_update(), 713 TokenType.USE: lambda self: self.expression( 714 exp.Use, 715 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 716 this=self._parse_table(schema=False), 717 ), 718 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 719 } 720 721 UNARY_PARSERS = { 722 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 723 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 724 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 725 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 726 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 727 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 728 } 729 730 STRING_PARSERS = { 731 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 732 exp.RawString, this=token.text 733 ), 734 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 735 exp.National, this=token.text 736 ), 737 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 738 TokenType.STRING: lambda self, token: self.expression( 739 exp.Literal, this=token.text, is_string=True 740 ), 741 TokenType.UNICODE_STRING: lambda self, token: self.expression( 742 exp.UnicodeString, 743 this=token.text, 744 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 745 ), 746 } 747 748 NUMERIC_PARSERS = { 749 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 750 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 751 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 752 TokenType.NUMBER: lambda self, token: self.expression( 753 exp.Literal, this=token.text, is_string=False 754 ), 755 } 756 757 PRIMARY_PARSERS = { 758 **STRING_PARSERS, 759 **NUMERIC_PARSERS, 760 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 761 TokenType.NULL: lambda self, _: self.expression(exp.Null), 762 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 763 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 764 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 765 TokenType.STAR: lambda self, _: self.expression( 766 exp.Star, 767 **{ 768 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 769 "replace": self._parse_star_op("REPLACE"), 770 "rename": self._parse_star_op("RENAME"), 771 }, 772 ), 773 } 774 775 PLACEHOLDER_PARSERS = { 776 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 777 TokenType.PARAMETER: lambda self: self._parse_parameter(), 778 TokenType.COLON: lambda self: ( 779 self.expression(exp.Placeholder, this=self._prev.text) 780 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 781 else None 782 ), 783 } 784 785 RANGE_PARSERS = { 786 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 787 TokenType.GLOB: binary_range_parser(exp.Glob), 788 TokenType.ILIKE: binary_range_parser(exp.ILike), 789 TokenType.IN: lambda self, this: self._parse_in(this), 790 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 791 TokenType.IS: lambda self, this: self._parse_is(this), 792 TokenType.LIKE: binary_range_parser(exp.Like), 793 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 794 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 795 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 796 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 797 } 798 799 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 800 "ALLOWED_VALUES": lambda self: self.expression( 801 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 802 ), 803 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 804 "AUTO": lambda self: self._parse_auto_property(), 805 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 806 "BACKUP": lambda self: self.expression( 807 exp.BackupProperty, this=self._parse_var(any_token=True) 808 ), 809 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 810 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 811 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 812 "CHECKSUM": lambda self: self._parse_checksum(), 813 "CLUSTER BY": lambda self: self._parse_cluster(), 814 "CLUSTERED": lambda self: self._parse_clustered_by(), 815 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 816 exp.CollateProperty, **kwargs 817 ), 818 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 819 "CONTAINS": lambda self: self._parse_contains_property(), 820 "COPY": lambda self: self._parse_copy_property(), 821 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 822 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 823 "DEFINER": lambda self: self._parse_definer(), 824 "DETERMINISTIC": lambda self: self.expression( 825 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 826 ), 827 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 828 "DISTKEY": lambda self: self._parse_distkey(), 829 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 830 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 831 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 832 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 833 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 834 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 835 "FREESPACE": lambda self: self._parse_freespace(), 836 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 837 "HEAP": lambda self: self.expression(exp.HeapProperty), 838 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 839 "IMMUTABLE": lambda self: self.expression( 840 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 841 ), 842 "INHERITS": lambda self: self.expression( 843 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 844 ), 845 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 846 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 847 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 848 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 849 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 850 "LIKE": lambda self: self._parse_create_like(), 851 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 852 "LOCK": lambda self: self._parse_locking(), 853 "LOCKING": lambda self: self._parse_locking(), 854 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 855 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 856 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 857 "MODIFIES": lambda self: self._parse_modifies_property(), 858 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 859 "NO": lambda self: self._parse_no_property(), 860 "ON": lambda self: self._parse_on_property(), 861 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 862 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 863 "PARTITION": lambda self: self._parse_partitioned_of(), 864 "PARTITION BY": lambda self: self._parse_partitioned_by(), 865 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 866 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 867 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 868 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 869 "READS": lambda self: self._parse_reads_property(), 870 "REMOTE": lambda self: self._parse_remote_with_connection(), 871 "RETURNS": lambda self: self._parse_returns(), 872 "STRICT": lambda self: self.expression(exp.StrictProperty), 873 "ROW": lambda self: self._parse_row(), 874 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 875 "SAMPLE": lambda self: self.expression( 876 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 877 ), 878 "SECURE": lambda self: self.expression(exp.SecureProperty), 879 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 880 "SETTINGS": lambda self: self.expression( 881 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 882 ), 883 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 884 "SORTKEY": lambda self: self._parse_sortkey(), 885 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 886 "STABLE": lambda self: self.expression( 887 exp.StabilityProperty, this=exp.Literal.string("STABLE") 888 ), 889 "STORED": lambda self: self._parse_stored(), 890 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 891 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 892 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 893 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 894 "TO": lambda self: self._parse_to_table(), 895 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 896 "TRANSFORM": lambda self: self.expression( 897 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 898 ), 899 "TTL": lambda self: self._parse_ttl(), 900 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 901 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 902 "VOLATILE": lambda self: self._parse_volatile_property(), 903 "WITH": lambda self: self._parse_with_property(), 904 } 905 906 CONSTRAINT_PARSERS = { 907 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 908 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 909 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 910 "CHARACTER SET": lambda self: self.expression( 911 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 912 ), 913 "CHECK": lambda self: self.expression( 914 exp.CheckColumnConstraint, 915 this=self._parse_wrapped(self._parse_assignment), 916 enforced=self._match_text_seq("ENFORCED"), 917 ), 918 "COLLATE": lambda self: self.expression( 919 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 920 ), 921 "COMMENT": lambda self: self.expression( 922 exp.CommentColumnConstraint, this=self._parse_string() 923 ), 924 "COMPRESS": lambda self: self._parse_compress(), 925 "CLUSTERED": lambda self: self.expression( 926 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 927 ), 928 "NONCLUSTERED": lambda self: self.expression( 929 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 930 ), 931 "DEFAULT": lambda self: self.expression( 932 exp.DefaultColumnConstraint, this=self._parse_bitwise() 933 ), 934 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 935 "EPHEMERAL": lambda self: self.expression( 936 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 937 ), 938 "EXCLUDE": lambda self: self.expression( 939 exp.ExcludeColumnConstraint, this=self._parse_index_params() 940 ), 941 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 942 "FORMAT": lambda self: self.expression( 943 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 944 ), 945 "GENERATED": lambda self: self._parse_generated_as_identity(), 946 "IDENTITY": lambda self: self._parse_auto_increment(), 947 "INLINE": lambda self: self._parse_inline(), 948 "LIKE": lambda self: self._parse_create_like(), 949 "NOT": lambda self: self._parse_not_constraint(), 950 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 951 "ON": lambda self: ( 952 self._match(TokenType.UPDATE) 953 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 954 ) 955 or self.expression(exp.OnProperty, this=self._parse_id_var()), 956 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 957 "PERIOD": lambda self: self._parse_period_for_system_time(), 958 "PRIMARY KEY": lambda self: self._parse_primary_key(), 959 "REFERENCES": lambda self: self._parse_references(match=False), 960 "TITLE": lambda self: self.expression( 961 exp.TitleColumnConstraint, this=self._parse_var_or_string() 962 ), 963 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 964 "UNIQUE": lambda self: self._parse_unique(), 965 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 966 "WITH": lambda self: self.expression( 967 exp.Properties, expressions=self._parse_wrapped_properties() 968 ), 969 } 970 971 ALTER_PARSERS = { 972 "ADD": lambda self: self._parse_alter_table_add(), 973 "ALTER": lambda self: self._parse_alter_table_alter(), 974 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 975 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 976 "DROP": lambda self: self._parse_alter_table_drop(), 977 "RENAME": lambda self: self._parse_alter_table_rename(), 978 "SET": lambda self: self._parse_alter_table_set(), 979 } 980 981 ALTER_ALTER_PARSERS = { 982 "DISTKEY": lambda self: self._parse_alter_diststyle(), 983 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 984 "SORTKEY": lambda self: self._parse_alter_sortkey(), 985 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 986 } 987 988 SCHEMA_UNNAMED_CONSTRAINTS = { 989 "CHECK", 990 "EXCLUDE", 991 "FOREIGN KEY", 992 "LIKE", 993 "PERIOD", 994 "PRIMARY KEY", 995 "UNIQUE", 996 } 997 998 NO_PAREN_FUNCTION_PARSERS = { 999 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1000 "CASE": lambda self: self._parse_case(), 1001 "CONNECT_BY_ROOT": lambda self: self.expression( 1002 exp.ConnectByRoot, this=self._parse_column() 1003 ), 1004 "IF": lambda self: self._parse_if(), 1005 "NEXT": lambda self: self._parse_next_value_for(), 1006 } 1007 1008 INVALID_FUNC_NAME_TOKENS = { 1009 TokenType.IDENTIFIER, 1010 TokenType.STRING, 1011 } 1012 1013 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1014 1015 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1016 1017 FUNCTION_PARSERS = { 1018 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1019 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1020 "DECODE": lambda self: self._parse_decode(), 1021 "EXTRACT": lambda self: self._parse_extract(), 1022 "GAP_FILL": lambda self: self._parse_gap_fill(), 1023 "JSON_OBJECT": lambda self: self._parse_json_object(), 1024 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1025 "JSON_TABLE": lambda self: self._parse_json_table(), 1026 "MATCH": lambda self: self._parse_match_against(), 1027 "OPENJSON": lambda self: self._parse_open_json(), 1028 "POSITION": lambda self: self._parse_position(), 1029 "PREDICT": lambda self: self._parse_predict(), 1030 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1031 "STRING_AGG": lambda self: self._parse_string_agg(), 1032 "SUBSTRING": lambda self: self._parse_substring(), 1033 "TRIM": lambda self: self._parse_trim(), 1034 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1035 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1036 } 1037 1038 QUERY_MODIFIER_PARSERS = { 1039 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1040 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1041 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1042 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1043 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1044 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1045 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1046 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1047 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1048 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1049 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1050 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1051 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1052 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1053 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1054 TokenType.CLUSTER_BY: lambda self: ( 1055 "cluster", 1056 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1057 ), 1058 TokenType.DISTRIBUTE_BY: lambda self: ( 1059 "distribute", 1060 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1061 ), 1062 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1063 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1064 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1065 } 1066 1067 SET_PARSERS = { 1068 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1069 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1070 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1071 "TRANSACTION": lambda self: self._parse_set_transaction(), 1072 } 1073 1074 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1075 1076 TYPE_LITERAL_PARSERS = { 1077 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1078 } 1079 1080 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1081 1082 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1083 1084 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1085 1086 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1087 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1088 "ISOLATION": ( 1089 ("LEVEL", "REPEATABLE", "READ"), 1090 ("LEVEL", "READ", "COMMITTED"), 1091 ("LEVEL", "READ", "UNCOMITTED"), 1092 ("LEVEL", "SERIALIZABLE"), 1093 ), 1094 "READ": ("WRITE", "ONLY"), 1095 } 1096 1097 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1098 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1099 ) 1100 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1101 1102 CREATE_SEQUENCE: OPTIONS_TYPE = { 1103 "SCALE": ("EXTEND", "NOEXTEND"), 1104 "SHARD": ("EXTEND", "NOEXTEND"), 1105 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1106 **dict.fromkeys( 1107 ( 1108 "SESSION", 1109 "GLOBAL", 1110 "KEEP", 1111 "NOKEEP", 1112 "ORDER", 1113 "NOORDER", 1114 "NOCACHE", 1115 "CYCLE", 1116 "NOCYCLE", 1117 "NOMINVALUE", 1118 "NOMAXVALUE", 1119 "NOSCALE", 1120 "NOSHARD", 1121 ), 1122 tuple(), 1123 ), 1124 } 1125 1126 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1127 1128 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1129 1130 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1131 1132 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1133 "TYPE": ("EVOLUTION",), 1134 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1135 } 1136 1137 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1138 1139 CLONE_KEYWORDS = {"CLONE", "COPY"} 1140 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1141 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1142 1143 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1144 1145 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1146 1147 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1148 1149 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1150 1151 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1152 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1153 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1154 1155 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1156 1157 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1158 1159 ADD_CONSTRAINT_TOKENS = { 1160 TokenType.CONSTRAINT, 1161 TokenType.FOREIGN_KEY, 1162 TokenType.INDEX, 1163 TokenType.KEY, 1164 TokenType.PRIMARY_KEY, 1165 TokenType.UNIQUE, 1166 } 1167 1168 DISTINCT_TOKENS = {TokenType.DISTINCT} 1169 1170 NULL_TOKENS = {TokenType.NULL} 1171 1172 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1173 1174 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1175 1176 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1177 1178 STRICT_CAST = True 1179 1180 PREFIXED_PIVOT_COLUMNS = False 1181 IDENTIFY_PIVOT_STRINGS = False 1182 1183 LOG_DEFAULTS_TO_LN = False 1184 1185 # Whether ADD is present for each column added by ALTER TABLE 1186 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1187 1188 # Whether the table sample clause expects CSV syntax 1189 TABLESAMPLE_CSV = False 1190 1191 # The default method used for table sampling 1192 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1193 1194 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1195 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1196 1197 # Whether the TRIM function expects the characters to trim as its first argument 1198 TRIM_PATTERN_FIRST = False 1199 1200 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1201 STRING_ALIASES = False 1202 1203 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1204 MODIFIERS_ATTACHED_TO_SET_OP = True 1205 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1206 1207 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1208 NO_PAREN_IF_COMMANDS = True 1209 1210 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1211 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1212 1213 # Whether the `:` operator is used to extract a value from a VARIANT column 1214 COLON_IS_VARIANT_EXTRACT = False 1215 1216 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1217 # If this is True and '(' is not found, the keyword will be treated as an identifier 1218 VALUES_FOLLOWED_BY_PAREN = True 1219 1220 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1221 SUPPORTS_IMPLICIT_UNNEST = False 1222 1223 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1224 INTERVAL_SPANS = True 1225 1226 # Whether a PARTITION clause can follow a table reference 1227 SUPPORTS_PARTITION_SELECTION = False 1228 1229 __slots__ = ( 1230 "error_level", 1231 "error_message_context", 1232 "max_errors", 1233 "dialect", 1234 "sql", 1235 "errors", 1236 "_tokens", 1237 "_index", 1238 "_curr", 1239 "_next", 1240 "_prev", 1241 "_prev_comments", 1242 ) 1243 1244 # Autofilled 1245 SHOW_TRIE: t.Dict = {} 1246 SET_TRIE: t.Dict = {} 1247 1248 def __init__( 1249 self, 1250 error_level: t.Optional[ErrorLevel] = None, 1251 error_message_context: int = 100, 1252 max_errors: int = 3, 1253 dialect: DialectType = None, 1254 ): 1255 from sqlglot.dialects import Dialect 1256 1257 self.error_level = error_level or ErrorLevel.IMMEDIATE 1258 self.error_message_context = error_message_context 1259 self.max_errors = max_errors 1260 self.dialect = Dialect.get_or_raise(dialect) 1261 self.reset() 1262 1263 def reset(self): 1264 self.sql = "" 1265 self.errors = [] 1266 self._tokens = [] 1267 self._index = 0 1268 self._curr = None 1269 self._next = None 1270 self._prev = None 1271 self._prev_comments = None 1272 1273 def parse( 1274 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1275 ) -> t.List[t.Optional[exp.Expression]]: 1276 """ 1277 Parses a list of tokens and returns a list of syntax trees, one tree 1278 per parsed SQL statement. 1279 1280 Args: 1281 raw_tokens: The list of tokens. 1282 sql: The original SQL string, used to produce helpful debug messages. 1283 1284 Returns: 1285 The list of the produced syntax trees. 1286 """ 1287 return self._parse( 1288 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1289 ) 1290 1291 def parse_into( 1292 self, 1293 expression_types: exp.IntoType, 1294 raw_tokens: t.List[Token], 1295 sql: t.Optional[str] = None, 1296 ) -> t.List[t.Optional[exp.Expression]]: 1297 """ 1298 Parses a list of tokens into a given Expression type. If a collection of Expression 1299 types is given instead, this method will try to parse the token list into each one 1300 of them, stopping at the first for which the parsing succeeds. 1301 1302 Args: 1303 expression_types: The expression type(s) to try and parse the token list into. 1304 raw_tokens: The list of tokens. 1305 sql: The original SQL string, used to produce helpful debug messages. 1306 1307 Returns: 1308 The target Expression. 1309 """ 1310 errors = [] 1311 for expression_type in ensure_list(expression_types): 1312 parser = self.EXPRESSION_PARSERS.get(expression_type) 1313 if not parser: 1314 raise TypeError(f"No parser registered for {expression_type}") 1315 1316 try: 1317 return self._parse(parser, raw_tokens, sql) 1318 except ParseError as e: 1319 e.errors[0]["into_expression"] = expression_type 1320 errors.append(e) 1321 1322 raise ParseError( 1323 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1324 errors=merge_errors(errors), 1325 ) from errors[-1] 1326 1327 def _parse( 1328 self, 1329 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1330 raw_tokens: t.List[Token], 1331 sql: t.Optional[str] = None, 1332 ) -> t.List[t.Optional[exp.Expression]]: 1333 self.reset() 1334 self.sql = sql or "" 1335 1336 total = len(raw_tokens) 1337 chunks: t.List[t.List[Token]] = [[]] 1338 1339 for i, token in enumerate(raw_tokens): 1340 if token.token_type == TokenType.SEMICOLON: 1341 if token.comments: 1342 chunks.append([token]) 1343 1344 if i < total - 1: 1345 chunks.append([]) 1346 else: 1347 chunks[-1].append(token) 1348 1349 expressions = [] 1350 1351 for tokens in chunks: 1352 self._index = -1 1353 self._tokens = tokens 1354 self._advance() 1355 1356 expressions.append(parse_method(self)) 1357 1358 if self._index < len(self._tokens): 1359 self.raise_error("Invalid expression / Unexpected token") 1360 1361 self.check_errors() 1362 1363 return expressions 1364 1365 def check_errors(self) -> None: 1366 """Logs or raises any found errors, depending on the chosen error level setting.""" 1367 if self.error_level == ErrorLevel.WARN: 1368 for error in self.errors: 1369 logger.error(str(error)) 1370 elif self.error_level == ErrorLevel.RAISE and self.errors: 1371 raise ParseError( 1372 concat_messages(self.errors, self.max_errors), 1373 errors=merge_errors(self.errors), 1374 ) 1375 1376 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1377 """ 1378 Appends an error in the list of recorded errors or raises it, depending on the chosen 1379 error level setting. 1380 """ 1381 token = token or self._curr or self._prev or Token.string("") 1382 start = token.start 1383 end = token.end + 1 1384 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1385 highlight = self.sql[start:end] 1386 end_context = self.sql[end : end + self.error_message_context] 1387 1388 error = ParseError.new( 1389 f"{message}. Line {token.line}, Col: {token.col}.\n" 1390 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1391 description=message, 1392 line=token.line, 1393 col=token.col, 1394 start_context=start_context, 1395 highlight=highlight, 1396 end_context=end_context, 1397 ) 1398 1399 if self.error_level == ErrorLevel.IMMEDIATE: 1400 raise error 1401 1402 self.errors.append(error) 1403 1404 def expression( 1405 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1406 ) -> E: 1407 """ 1408 Creates a new, validated Expression. 1409 1410 Args: 1411 exp_class: The expression class to instantiate. 1412 comments: An optional list of comments to attach to the expression. 1413 kwargs: The arguments to set for the expression along with their respective values. 1414 1415 Returns: 1416 The target expression. 1417 """ 1418 instance = exp_class(**kwargs) 1419 instance.add_comments(comments) if comments else self._add_comments(instance) 1420 return self.validate_expression(instance) 1421 1422 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1423 if expression and self._prev_comments: 1424 expression.add_comments(self._prev_comments) 1425 self._prev_comments = None 1426 1427 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1428 """ 1429 Validates an Expression, making sure that all its mandatory arguments are set. 1430 1431 Args: 1432 expression: The expression to validate. 1433 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1434 1435 Returns: 1436 The validated expression. 1437 """ 1438 if self.error_level != ErrorLevel.IGNORE: 1439 for error_message in expression.error_messages(args): 1440 self.raise_error(error_message) 1441 1442 return expression 1443 1444 def _find_sql(self, start: Token, end: Token) -> str: 1445 return self.sql[start.start : end.end + 1] 1446 1447 def _is_connected(self) -> bool: 1448 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1449 1450 def _advance(self, times: int = 1) -> None: 1451 self._index += times 1452 self._curr = seq_get(self._tokens, self._index) 1453 self._next = seq_get(self._tokens, self._index + 1) 1454 1455 if self._index > 0: 1456 self._prev = self._tokens[self._index - 1] 1457 self._prev_comments = self._prev.comments 1458 else: 1459 self._prev = None 1460 self._prev_comments = None 1461 1462 def _retreat(self, index: int) -> None: 1463 if index != self._index: 1464 self._advance(index - self._index) 1465 1466 def _warn_unsupported(self) -> None: 1467 if len(self._tokens) <= 1: 1468 return 1469 1470 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1471 # interested in emitting a warning for the one being currently processed. 1472 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1473 1474 logger.warning( 1475 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1476 ) 1477 1478 def _parse_command(self) -> exp.Command: 1479 self._warn_unsupported() 1480 return self.expression( 1481 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1482 ) 1483 1484 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1485 """ 1486 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1487 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1488 solve this by setting & resetting the parser state accordingly 1489 """ 1490 index = self._index 1491 error_level = self.error_level 1492 1493 self.error_level = ErrorLevel.IMMEDIATE 1494 try: 1495 this = parse_method() 1496 except ParseError: 1497 this = None 1498 finally: 1499 if not this or retreat: 1500 self._retreat(index) 1501 self.error_level = error_level 1502 1503 return this 1504 1505 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1506 start = self._prev 1507 exists = self._parse_exists() if allow_exists else None 1508 1509 self._match(TokenType.ON) 1510 1511 materialized = self._match_text_seq("MATERIALIZED") 1512 kind = self._match_set(self.CREATABLES) and self._prev 1513 if not kind: 1514 return self._parse_as_command(start) 1515 1516 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1517 this = self._parse_user_defined_function(kind=kind.token_type) 1518 elif kind.token_type == TokenType.TABLE: 1519 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1520 elif kind.token_type == TokenType.COLUMN: 1521 this = self._parse_column() 1522 else: 1523 this = self._parse_id_var() 1524 1525 self._match(TokenType.IS) 1526 1527 return self.expression( 1528 exp.Comment, 1529 this=this, 1530 kind=kind.text, 1531 expression=self._parse_string(), 1532 exists=exists, 1533 materialized=materialized, 1534 ) 1535 1536 def _parse_to_table( 1537 self, 1538 ) -> exp.ToTableProperty: 1539 table = self._parse_table_parts(schema=True) 1540 return self.expression(exp.ToTableProperty, this=table) 1541 1542 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1543 def _parse_ttl(self) -> exp.Expression: 1544 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1545 this = self._parse_bitwise() 1546 1547 if self._match_text_seq("DELETE"): 1548 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1549 if self._match_text_seq("RECOMPRESS"): 1550 return self.expression( 1551 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1552 ) 1553 if self._match_text_seq("TO", "DISK"): 1554 return self.expression( 1555 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1556 ) 1557 if self._match_text_seq("TO", "VOLUME"): 1558 return self.expression( 1559 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1560 ) 1561 1562 return this 1563 1564 expressions = self._parse_csv(_parse_ttl_action) 1565 where = self._parse_where() 1566 group = self._parse_group() 1567 1568 aggregates = None 1569 if group and self._match(TokenType.SET): 1570 aggregates = self._parse_csv(self._parse_set_item) 1571 1572 return self.expression( 1573 exp.MergeTreeTTL, 1574 expressions=expressions, 1575 where=where, 1576 group=group, 1577 aggregates=aggregates, 1578 ) 1579 1580 def _parse_statement(self) -> t.Optional[exp.Expression]: 1581 if self._curr is None: 1582 return None 1583 1584 if self._match_set(self.STATEMENT_PARSERS): 1585 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1586 1587 if self._match_set(self.dialect.tokenizer.COMMANDS): 1588 return self._parse_command() 1589 1590 expression = self._parse_expression() 1591 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1592 return self._parse_query_modifiers(expression) 1593 1594 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1595 start = self._prev 1596 temporary = self._match(TokenType.TEMPORARY) 1597 materialized = self._match_text_seq("MATERIALIZED") 1598 1599 kind = self._match_set(self.CREATABLES) and self._prev.text 1600 if not kind: 1601 return self._parse_as_command(start) 1602 1603 if_exists = exists or self._parse_exists() 1604 table = self._parse_table_parts( 1605 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1606 ) 1607 1608 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1609 1610 if self._match(TokenType.L_PAREN, advance=False): 1611 expressions = self._parse_wrapped_csv(self._parse_types) 1612 else: 1613 expressions = None 1614 1615 return self.expression( 1616 exp.Drop, 1617 comments=start.comments, 1618 exists=if_exists, 1619 this=table, 1620 expressions=expressions, 1621 kind=kind.upper(), 1622 temporary=temporary, 1623 materialized=materialized, 1624 cascade=self._match_text_seq("CASCADE"), 1625 constraints=self._match_text_seq("CONSTRAINTS"), 1626 purge=self._match_text_seq("PURGE"), 1627 cluster=cluster, 1628 ) 1629 1630 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1631 return ( 1632 self._match_text_seq("IF") 1633 and (not not_ or self._match(TokenType.NOT)) 1634 and self._match(TokenType.EXISTS) 1635 ) 1636 1637 def _parse_create(self) -> exp.Create | exp.Command: 1638 # Note: this can't be None because we've matched a statement parser 1639 start = self._prev 1640 comments = self._prev_comments 1641 1642 replace = ( 1643 start.token_type == TokenType.REPLACE 1644 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1645 or self._match_pair(TokenType.OR, TokenType.ALTER) 1646 ) 1647 1648 unique = self._match(TokenType.UNIQUE) 1649 1650 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1651 self._advance() 1652 1653 properties = None 1654 create_token = self._match_set(self.CREATABLES) and self._prev 1655 1656 if not create_token: 1657 # exp.Properties.Location.POST_CREATE 1658 properties = self._parse_properties() 1659 create_token = self._match_set(self.CREATABLES) and self._prev 1660 1661 if not properties or not create_token: 1662 return self._parse_as_command(start) 1663 1664 exists = self._parse_exists(not_=True) 1665 this = None 1666 expression: t.Optional[exp.Expression] = None 1667 indexes = None 1668 no_schema_binding = None 1669 begin = None 1670 end = None 1671 clone = None 1672 1673 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1674 nonlocal properties 1675 if properties and temp_props: 1676 properties.expressions.extend(temp_props.expressions) 1677 elif temp_props: 1678 properties = temp_props 1679 1680 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1681 this = self._parse_user_defined_function(kind=create_token.token_type) 1682 1683 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1684 extend_props(self._parse_properties()) 1685 1686 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1687 extend_props(self._parse_properties()) 1688 1689 if not expression: 1690 if self._match(TokenType.COMMAND): 1691 expression = self._parse_as_command(self._prev) 1692 else: 1693 begin = self._match(TokenType.BEGIN) 1694 return_ = self._match_text_seq("RETURN") 1695 1696 if self._match(TokenType.STRING, advance=False): 1697 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1698 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1699 expression = self._parse_string() 1700 extend_props(self._parse_properties()) 1701 else: 1702 expression = self._parse_statement() 1703 1704 end = self._match_text_seq("END") 1705 1706 if return_: 1707 expression = self.expression(exp.Return, this=expression) 1708 elif create_token.token_type == TokenType.INDEX: 1709 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1710 if not self._match(TokenType.ON): 1711 index = self._parse_id_var() 1712 anonymous = False 1713 else: 1714 index = None 1715 anonymous = True 1716 1717 this = self._parse_index(index=index, anonymous=anonymous) 1718 elif create_token.token_type in self.DB_CREATABLES: 1719 table_parts = self._parse_table_parts( 1720 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1721 ) 1722 1723 # exp.Properties.Location.POST_NAME 1724 self._match(TokenType.COMMA) 1725 extend_props(self._parse_properties(before=True)) 1726 1727 this = self._parse_schema(this=table_parts) 1728 1729 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1730 extend_props(self._parse_properties()) 1731 1732 self._match(TokenType.ALIAS) 1733 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1734 # exp.Properties.Location.POST_ALIAS 1735 extend_props(self._parse_properties()) 1736 1737 if create_token.token_type == TokenType.SEQUENCE: 1738 expression = self._parse_types() 1739 extend_props(self._parse_properties()) 1740 else: 1741 expression = self._parse_ddl_select() 1742 1743 if create_token.token_type == TokenType.TABLE: 1744 # exp.Properties.Location.POST_EXPRESSION 1745 extend_props(self._parse_properties()) 1746 1747 indexes = [] 1748 while True: 1749 index = self._parse_index() 1750 1751 # exp.Properties.Location.POST_INDEX 1752 extend_props(self._parse_properties()) 1753 1754 if not index: 1755 break 1756 else: 1757 self._match(TokenType.COMMA) 1758 indexes.append(index) 1759 elif create_token.token_type == TokenType.VIEW: 1760 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1761 no_schema_binding = True 1762 1763 shallow = self._match_text_seq("SHALLOW") 1764 1765 if self._match_texts(self.CLONE_KEYWORDS): 1766 copy = self._prev.text.lower() == "copy" 1767 clone = self.expression( 1768 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1769 ) 1770 1771 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1772 return self._parse_as_command(start) 1773 1774 return self.expression( 1775 exp.Create, 1776 comments=comments, 1777 this=this, 1778 kind=create_token.text.upper(), 1779 replace=replace, 1780 unique=unique, 1781 expression=expression, 1782 exists=exists, 1783 properties=properties, 1784 indexes=indexes, 1785 no_schema_binding=no_schema_binding, 1786 begin=begin, 1787 end=end, 1788 clone=clone, 1789 ) 1790 1791 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1792 seq = exp.SequenceProperties() 1793 1794 options = [] 1795 index = self._index 1796 1797 while self._curr: 1798 self._match(TokenType.COMMA) 1799 if self._match_text_seq("INCREMENT"): 1800 self._match_text_seq("BY") 1801 self._match_text_seq("=") 1802 seq.set("increment", self._parse_term()) 1803 elif self._match_text_seq("MINVALUE"): 1804 seq.set("minvalue", self._parse_term()) 1805 elif self._match_text_seq("MAXVALUE"): 1806 seq.set("maxvalue", self._parse_term()) 1807 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1808 self._match_text_seq("=") 1809 seq.set("start", self._parse_term()) 1810 elif self._match_text_seq("CACHE"): 1811 # T-SQL allows empty CACHE which is initialized dynamically 1812 seq.set("cache", self._parse_number() or True) 1813 elif self._match_text_seq("OWNED", "BY"): 1814 # "OWNED BY NONE" is the default 1815 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1816 else: 1817 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1818 if opt: 1819 options.append(opt) 1820 else: 1821 break 1822 1823 seq.set("options", options if options else None) 1824 return None if self._index == index else seq 1825 1826 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1827 # only used for teradata currently 1828 self._match(TokenType.COMMA) 1829 1830 kwargs = { 1831 "no": self._match_text_seq("NO"), 1832 "dual": self._match_text_seq("DUAL"), 1833 "before": self._match_text_seq("BEFORE"), 1834 "default": self._match_text_seq("DEFAULT"), 1835 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1836 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1837 "after": self._match_text_seq("AFTER"), 1838 "minimum": self._match_texts(("MIN", "MINIMUM")), 1839 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1840 } 1841 1842 if self._match_texts(self.PROPERTY_PARSERS): 1843 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1844 try: 1845 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1846 except TypeError: 1847 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1848 1849 return None 1850 1851 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1852 return self._parse_wrapped_csv(self._parse_property) 1853 1854 def _parse_property(self) -> t.Optional[exp.Expression]: 1855 if self._match_texts(self.PROPERTY_PARSERS): 1856 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1857 1858 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1859 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1860 1861 if self._match_text_seq("COMPOUND", "SORTKEY"): 1862 return self._parse_sortkey(compound=True) 1863 1864 if self._match_text_seq("SQL", "SECURITY"): 1865 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1866 1867 index = self._index 1868 key = self._parse_column() 1869 1870 if not self._match(TokenType.EQ): 1871 self._retreat(index) 1872 return self._parse_sequence_properties() 1873 1874 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1875 if isinstance(key, exp.Column): 1876 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1877 1878 value = self._parse_bitwise() or self._parse_var(any_token=True) 1879 1880 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1881 if isinstance(value, exp.Column): 1882 value = exp.var(value.name) 1883 1884 return self.expression(exp.Property, this=key, value=value) 1885 1886 def _parse_stored(self) -> exp.FileFormatProperty: 1887 self._match(TokenType.ALIAS) 1888 1889 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1890 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1891 1892 return self.expression( 1893 exp.FileFormatProperty, 1894 this=( 1895 self.expression( 1896 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1897 ) 1898 if input_format or output_format 1899 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1900 ), 1901 ) 1902 1903 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1904 field = self._parse_field() 1905 if isinstance(field, exp.Identifier) and not field.quoted: 1906 field = exp.var(field) 1907 1908 return field 1909 1910 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1911 self._match(TokenType.EQ) 1912 self._match(TokenType.ALIAS) 1913 1914 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1915 1916 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1917 properties = [] 1918 while True: 1919 if before: 1920 prop = self._parse_property_before() 1921 else: 1922 prop = self._parse_property() 1923 if not prop: 1924 break 1925 for p in ensure_list(prop): 1926 properties.append(p) 1927 1928 if properties: 1929 return self.expression(exp.Properties, expressions=properties) 1930 1931 return None 1932 1933 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1934 return self.expression( 1935 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1936 ) 1937 1938 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1939 if self._index >= 2: 1940 pre_volatile_token = self._tokens[self._index - 2] 1941 else: 1942 pre_volatile_token = None 1943 1944 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1945 return exp.VolatileProperty() 1946 1947 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1948 1949 def _parse_retention_period(self) -> exp.Var: 1950 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1951 number = self._parse_number() 1952 number_str = f"{number} " if number else "" 1953 unit = self._parse_var(any_token=True) 1954 return exp.var(f"{number_str}{unit}") 1955 1956 def _parse_system_versioning_property( 1957 self, with_: bool = False 1958 ) -> exp.WithSystemVersioningProperty: 1959 self._match(TokenType.EQ) 1960 prop = self.expression( 1961 exp.WithSystemVersioningProperty, 1962 **{ # type: ignore 1963 "on": True, 1964 "with": with_, 1965 }, 1966 ) 1967 1968 if self._match_text_seq("OFF"): 1969 prop.set("on", False) 1970 return prop 1971 1972 self._match(TokenType.ON) 1973 if self._match(TokenType.L_PAREN): 1974 while self._curr and not self._match(TokenType.R_PAREN): 1975 if self._match_text_seq("HISTORY_TABLE", "="): 1976 prop.set("this", self._parse_table_parts()) 1977 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1978 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1979 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1980 prop.set("retention_period", self._parse_retention_period()) 1981 1982 self._match(TokenType.COMMA) 1983 1984 return prop 1985 1986 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1987 self._match(TokenType.EQ) 1988 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1989 prop = self.expression(exp.DataDeletionProperty, on=on) 1990 1991 if self._match(TokenType.L_PAREN): 1992 while self._curr and not self._match(TokenType.R_PAREN): 1993 if self._match_text_seq("FILTER_COLUMN", "="): 1994 prop.set("filter_column", self._parse_column()) 1995 elif self._match_text_seq("RETENTION_PERIOD", "="): 1996 prop.set("retention_period", self._parse_retention_period()) 1997 1998 self._match(TokenType.COMMA) 1999 2000 return prop 2001 2002 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2003 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2004 prop = self._parse_system_versioning_property(with_=True) 2005 self._match_r_paren() 2006 return prop 2007 2008 if self._match(TokenType.L_PAREN, advance=False): 2009 return self._parse_wrapped_properties() 2010 2011 if self._match_text_seq("JOURNAL"): 2012 return self._parse_withjournaltable() 2013 2014 if self._match_texts(self.VIEW_ATTRIBUTES): 2015 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2016 2017 if self._match_text_seq("DATA"): 2018 return self._parse_withdata(no=False) 2019 elif self._match_text_seq("NO", "DATA"): 2020 return self._parse_withdata(no=True) 2021 2022 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2023 return self._parse_serde_properties(with_=True) 2024 2025 if self._match(TokenType.SCHEMA): 2026 return self.expression( 2027 exp.WithSchemaBindingProperty, 2028 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2029 ) 2030 2031 if not self._next: 2032 return None 2033 2034 return self._parse_withisolatedloading() 2035 2036 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2037 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2038 self._match(TokenType.EQ) 2039 2040 user = self._parse_id_var() 2041 self._match(TokenType.PARAMETER) 2042 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2043 2044 if not user or not host: 2045 return None 2046 2047 return exp.DefinerProperty(this=f"{user}@{host}") 2048 2049 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2050 self._match(TokenType.TABLE) 2051 self._match(TokenType.EQ) 2052 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2053 2054 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2055 return self.expression(exp.LogProperty, no=no) 2056 2057 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2058 return self.expression(exp.JournalProperty, **kwargs) 2059 2060 def _parse_checksum(self) -> exp.ChecksumProperty: 2061 self._match(TokenType.EQ) 2062 2063 on = None 2064 if self._match(TokenType.ON): 2065 on = True 2066 elif self._match_text_seq("OFF"): 2067 on = False 2068 2069 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2070 2071 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2072 return self.expression( 2073 exp.Cluster, 2074 expressions=( 2075 self._parse_wrapped_csv(self._parse_ordered) 2076 if wrapped 2077 else self._parse_csv(self._parse_ordered) 2078 ), 2079 ) 2080 2081 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2082 self._match_text_seq("BY") 2083 2084 self._match_l_paren() 2085 expressions = self._parse_csv(self._parse_column) 2086 self._match_r_paren() 2087 2088 if self._match_text_seq("SORTED", "BY"): 2089 self._match_l_paren() 2090 sorted_by = self._parse_csv(self._parse_ordered) 2091 self._match_r_paren() 2092 else: 2093 sorted_by = None 2094 2095 self._match(TokenType.INTO) 2096 buckets = self._parse_number() 2097 self._match_text_seq("BUCKETS") 2098 2099 return self.expression( 2100 exp.ClusteredByProperty, 2101 expressions=expressions, 2102 sorted_by=sorted_by, 2103 buckets=buckets, 2104 ) 2105 2106 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2107 if not self._match_text_seq("GRANTS"): 2108 self._retreat(self._index - 1) 2109 return None 2110 2111 return self.expression(exp.CopyGrantsProperty) 2112 2113 def _parse_freespace(self) -> exp.FreespaceProperty: 2114 self._match(TokenType.EQ) 2115 return self.expression( 2116 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2117 ) 2118 2119 def _parse_mergeblockratio( 2120 self, no: bool = False, default: bool = False 2121 ) -> exp.MergeBlockRatioProperty: 2122 if self._match(TokenType.EQ): 2123 return self.expression( 2124 exp.MergeBlockRatioProperty, 2125 this=self._parse_number(), 2126 percent=self._match(TokenType.PERCENT), 2127 ) 2128 2129 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2130 2131 def _parse_datablocksize( 2132 self, 2133 default: t.Optional[bool] = None, 2134 minimum: t.Optional[bool] = None, 2135 maximum: t.Optional[bool] = None, 2136 ) -> exp.DataBlocksizeProperty: 2137 self._match(TokenType.EQ) 2138 size = self._parse_number() 2139 2140 units = None 2141 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2142 units = self._prev.text 2143 2144 return self.expression( 2145 exp.DataBlocksizeProperty, 2146 size=size, 2147 units=units, 2148 default=default, 2149 minimum=minimum, 2150 maximum=maximum, 2151 ) 2152 2153 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2154 self._match(TokenType.EQ) 2155 always = self._match_text_seq("ALWAYS") 2156 manual = self._match_text_seq("MANUAL") 2157 never = self._match_text_seq("NEVER") 2158 default = self._match_text_seq("DEFAULT") 2159 2160 autotemp = None 2161 if self._match_text_seq("AUTOTEMP"): 2162 autotemp = self._parse_schema() 2163 2164 return self.expression( 2165 exp.BlockCompressionProperty, 2166 always=always, 2167 manual=manual, 2168 never=never, 2169 default=default, 2170 autotemp=autotemp, 2171 ) 2172 2173 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2174 index = self._index 2175 no = self._match_text_seq("NO") 2176 concurrent = self._match_text_seq("CONCURRENT") 2177 2178 if not self._match_text_seq("ISOLATED", "LOADING"): 2179 self._retreat(index) 2180 return None 2181 2182 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2183 return self.expression( 2184 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2185 ) 2186 2187 def _parse_locking(self) -> exp.LockingProperty: 2188 if self._match(TokenType.TABLE): 2189 kind = "TABLE" 2190 elif self._match(TokenType.VIEW): 2191 kind = "VIEW" 2192 elif self._match(TokenType.ROW): 2193 kind = "ROW" 2194 elif self._match_text_seq("DATABASE"): 2195 kind = "DATABASE" 2196 else: 2197 kind = None 2198 2199 if kind in ("DATABASE", "TABLE", "VIEW"): 2200 this = self._parse_table_parts() 2201 else: 2202 this = None 2203 2204 if self._match(TokenType.FOR): 2205 for_or_in = "FOR" 2206 elif self._match(TokenType.IN): 2207 for_or_in = "IN" 2208 else: 2209 for_or_in = None 2210 2211 if self._match_text_seq("ACCESS"): 2212 lock_type = "ACCESS" 2213 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2214 lock_type = "EXCLUSIVE" 2215 elif self._match_text_seq("SHARE"): 2216 lock_type = "SHARE" 2217 elif self._match_text_seq("READ"): 2218 lock_type = "READ" 2219 elif self._match_text_seq("WRITE"): 2220 lock_type = "WRITE" 2221 elif self._match_text_seq("CHECKSUM"): 2222 lock_type = "CHECKSUM" 2223 else: 2224 lock_type = None 2225 2226 override = self._match_text_seq("OVERRIDE") 2227 2228 return self.expression( 2229 exp.LockingProperty, 2230 this=this, 2231 kind=kind, 2232 for_or_in=for_or_in, 2233 lock_type=lock_type, 2234 override=override, 2235 ) 2236 2237 def _parse_partition_by(self) -> t.List[exp.Expression]: 2238 if self._match(TokenType.PARTITION_BY): 2239 return self._parse_csv(self._parse_assignment) 2240 return [] 2241 2242 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2243 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2244 if self._match_text_seq("MINVALUE"): 2245 return exp.var("MINVALUE") 2246 if self._match_text_seq("MAXVALUE"): 2247 return exp.var("MAXVALUE") 2248 return self._parse_bitwise() 2249 2250 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2251 expression = None 2252 from_expressions = None 2253 to_expressions = None 2254 2255 if self._match(TokenType.IN): 2256 this = self._parse_wrapped_csv(self._parse_bitwise) 2257 elif self._match(TokenType.FROM): 2258 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2259 self._match_text_seq("TO") 2260 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2261 elif self._match_text_seq("WITH", "(", "MODULUS"): 2262 this = self._parse_number() 2263 self._match_text_seq(",", "REMAINDER") 2264 expression = self._parse_number() 2265 self._match_r_paren() 2266 else: 2267 self.raise_error("Failed to parse partition bound spec.") 2268 2269 return self.expression( 2270 exp.PartitionBoundSpec, 2271 this=this, 2272 expression=expression, 2273 from_expressions=from_expressions, 2274 to_expressions=to_expressions, 2275 ) 2276 2277 # https://www.postgresql.org/docs/current/sql-createtable.html 2278 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2279 if not self._match_text_seq("OF"): 2280 self._retreat(self._index - 1) 2281 return None 2282 2283 this = self._parse_table(schema=True) 2284 2285 if self._match(TokenType.DEFAULT): 2286 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2287 elif self._match_text_seq("FOR", "VALUES"): 2288 expression = self._parse_partition_bound_spec() 2289 else: 2290 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2291 2292 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2293 2294 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2295 self._match(TokenType.EQ) 2296 return self.expression( 2297 exp.PartitionedByProperty, 2298 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2299 ) 2300 2301 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2302 if self._match_text_seq("AND", "STATISTICS"): 2303 statistics = True 2304 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2305 statistics = False 2306 else: 2307 statistics = None 2308 2309 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2310 2311 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2312 if self._match_text_seq("SQL"): 2313 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2314 return None 2315 2316 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2317 if self._match_text_seq("SQL", "DATA"): 2318 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2319 return None 2320 2321 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2322 if self._match_text_seq("PRIMARY", "INDEX"): 2323 return exp.NoPrimaryIndexProperty() 2324 if self._match_text_seq("SQL"): 2325 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2326 return None 2327 2328 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2329 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2330 return exp.OnCommitProperty() 2331 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2332 return exp.OnCommitProperty(delete=True) 2333 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2334 2335 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2336 if self._match_text_seq("SQL", "DATA"): 2337 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2338 return None 2339 2340 def _parse_distkey(self) -> exp.DistKeyProperty: 2341 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2342 2343 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2344 table = self._parse_table(schema=True) 2345 2346 options = [] 2347 while self._match_texts(("INCLUDING", "EXCLUDING")): 2348 this = self._prev.text.upper() 2349 2350 id_var = self._parse_id_var() 2351 if not id_var: 2352 return None 2353 2354 options.append( 2355 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2356 ) 2357 2358 return self.expression(exp.LikeProperty, this=table, expressions=options) 2359 2360 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2361 return self.expression( 2362 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2363 ) 2364 2365 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2366 self._match(TokenType.EQ) 2367 return self.expression( 2368 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2369 ) 2370 2371 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2372 self._match_text_seq("WITH", "CONNECTION") 2373 return self.expression( 2374 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2375 ) 2376 2377 def _parse_returns(self) -> exp.ReturnsProperty: 2378 value: t.Optional[exp.Expression] 2379 null = None 2380 is_table = self._match(TokenType.TABLE) 2381 2382 if is_table: 2383 if self._match(TokenType.LT): 2384 value = self.expression( 2385 exp.Schema, 2386 this="TABLE", 2387 expressions=self._parse_csv(self._parse_struct_types), 2388 ) 2389 if not self._match(TokenType.GT): 2390 self.raise_error("Expecting >") 2391 else: 2392 value = self._parse_schema(exp.var("TABLE")) 2393 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2394 null = True 2395 value = None 2396 else: 2397 value = self._parse_types() 2398 2399 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2400 2401 def _parse_describe(self) -> exp.Describe: 2402 kind = self._match_set(self.CREATABLES) and self._prev.text 2403 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2404 if self._match(TokenType.DOT): 2405 style = None 2406 self._retreat(self._index - 2) 2407 this = self._parse_table(schema=True) 2408 properties = self._parse_properties() 2409 expressions = properties.expressions if properties else None 2410 return self.expression( 2411 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2412 ) 2413 2414 def _parse_insert(self) -> exp.Insert: 2415 comments = ensure_list(self._prev_comments) 2416 hint = self._parse_hint() 2417 overwrite = self._match(TokenType.OVERWRITE) 2418 ignore = self._match(TokenType.IGNORE) 2419 local = self._match_text_seq("LOCAL") 2420 alternative = None 2421 is_function = None 2422 2423 if self._match_text_seq("DIRECTORY"): 2424 this: t.Optional[exp.Expression] = self.expression( 2425 exp.Directory, 2426 this=self._parse_var_or_string(), 2427 local=local, 2428 row_format=self._parse_row_format(match_row=True), 2429 ) 2430 else: 2431 if self._match(TokenType.OR): 2432 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2433 2434 self._match(TokenType.INTO) 2435 comments += ensure_list(self._prev_comments) 2436 self._match(TokenType.TABLE) 2437 is_function = self._match(TokenType.FUNCTION) 2438 2439 this = ( 2440 self._parse_table(schema=True, parse_partition=True) 2441 if not is_function 2442 else self._parse_function() 2443 ) 2444 2445 returning = self._parse_returning() 2446 2447 return self.expression( 2448 exp.Insert, 2449 comments=comments, 2450 hint=hint, 2451 is_function=is_function, 2452 this=this, 2453 stored=self._match_text_seq("STORED") and self._parse_stored(), 2454 by_name=self._match_text_seq("BY", "NAME"), 2455 exists=self._parse_exists(), 2456 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2457 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2458 conflict=self._parse_on_conflict(), 2459 returning=returning or self._parse_returning(), 2460 overwrite=overwrite, 2461 alternative=alternative, 2462 ignore=ignore, 2463 ) 2464 2465 def _parse_kill(self) -> exp.Kill: 2466 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2467 2468 return self.expression( 2469 exp.Kill, 2470 this=self._parse_primary(), 2471 kind=kind, 2472 ) 2473 2474 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2475 conflict = self._match_text_seq("ON", "CONFLICT") 2476 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2477 2478 if not conflict and not duplicate: 2479 return None 2480 2481 conflict_keys = None 2482 constraint = None 2483 2484 if conflict: 2485 if self._match_text_seq("ON", "CONSTRAINT"): 2486 constraint = self._parse_id_var() 2487 elif self._match(TokenType.L_PAREN): 2488 conflict_keys = self._parse_csv(self._parse_id_var) 2489 self._match_r_paren() 2490 2491 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2492 if self._prev.token_type == TokenType.UPDATE: 2493 self._match(TokenType.SET) 2494 expressions = self._parse_csv(self._parse_equality) 2495 else: 2496 expressions = None 2497 2498 return self.expression( 2499 exp.OnConflict, 2500 duplicate=duplicate, 2501 expressions=expressions, 2502 action=action, 2503 conflict_keys=conflict_keys, 2504 constraint=constraint, 2505 ) 2506 2507 def _parse_returning(self) -> t.Optional[exp.Returning]: 2508 if not self._match(TokenType.RETURNING): 2509 return None 2510 return self.expression( 2511 exp.Returning, 2512 expressions=self._parse_csv(self._parse_expression), 2513 into=self._match(TokenType.INTO) and self._parse_table_part(), 2514 ) 2515 2516 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2517 if not self._match(TokenType.FORMAT): 2518 return None 2519 return self._parse_row_format() 2520 2521 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2522 index = self._index 2523 with_ = with_ or self._match_text_seq("WITH") 2524 2525 if not self._match(TokenType.SERDE_PROPERTIES): 2526 self._retreat(index) 2527 return None 2528 return self.expression( 2529 exp.SerdeProperties, 2530 **{ # type: ignore 2531 "expressions": self._parse_wrapped_properties(), 2532 "with": with_, 2533 }, 2534 ) 2535 2536 def _parse_row_format( 2537 self, match_row: bool = False 2538 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2539 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2540 return None 2541 2542 if self._match_text_seq("SERDE"): 2543 this = self._parse_string() 2544 2545 serde_properties = self._parse_serde_properties() 2546 2547 return self.expression( 2548 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2549 ) 2550 2551 self._match_text_seq("DELIMITED") 2552 2553 kwargs = {} 2554 2555 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2556 kwargs["fields"] = self._parse_string() 2557 if self._match_text_seq("ESCAPED", "BY"): 2558 kwargs["escaped"] = self._parse_string() 2559 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2560 kwargs["collection_items"] = self._parse_string() 2561 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2562 kwargs["map_keys"] = self._parse_string() 2563 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2564 kwargs["lines"] = self._parse_string() 2565 if self._match_text_seq("NULL", "DEFINED", "AS"): 2566 kwargs["null"] = self._parse_string() 2567 2568 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2569 2570 def _parse_load(self) -> exp.LoadData | exp.Command: 2571 if self._match_text_seq("DATA"): 2572 local = self._match_text_seq("LOCAL") 2573 self._match_text_seq("INPATH") 2574 inpath = self._parse_string() 2575 overwrite = self._match(TokenType.OVERWRITE) 2576 self._match_pair(TokenType.INTO, TokenType.TABLE) 2577 2578 return self.expression( 2579 exp.LoadData, 2580 this=self._parse_table(schema=True), 2581 local=local, 2582 overwrite=overwrite, 2583 inpath=inpath, 2584 partition=self._parse_partition(), 2585 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2586 serde=self._match_text_seq("SERDE") and self._parse_string(), 2587 ) 2588 return self._parse_as_command(self._prev) 2589 2590 def _parse_delete(self) -> exp.Delete: 2591 # This handles MySQL's "Multiple-Table Syntax" 2592 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2593 tables = None 2594 comments = self._prev_comments 2595 if not self._match(TokenType.FROM, advance=False): 2596 tables = self._parse_csv(self._parse_table) or None 2597 2598 returning = self._parse_returning() 2599 2600 return self.expression( 2601 exp.Delete, 2602 comments=comments, 2603 tables=tables, 2604 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2605 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2606 where=self._parse_where(), 2607 returning=returning or self._parse_returning(), 2608 limit=self._parse_limit(), 2609 ) 2610 2611 def _parse_update(self) -> exp.Update: 2612 comments = self._prev_comments 2613 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2614 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2615 returning = self._parse_returning() 2616 return self.expression( 2617 exp.Update, 2618 comments=comments, 2619 **{ # type: ignore 2620 "this": this, 2621 "expressions": expressions, 2622 "from": self._parse_from(joins=True), 2623 "where": self._parse_where(), 2624 "returning": returning or self._parse_returning(), 2625 "order": self._parse_order(), 2626 "limit": self._parse_limit(), 2627 }, 2628 ) 2629 2630 def _parse_uncache(self) -> exp.Uncache: 2631 if not self._match(TokenType.TABLE): 2632 self.raise_error("Expecting TABLE after UNCACHE") 2633 2634 return self.expression( 2635 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2636 ) 2637 2638 def _parse_cache(self) -> exp.Cache: 2639 lazy = self._match_text_seq("LAZY") 2640 self._match(TokenType.TABLE) 2641 table = self._parse_table(schema=True) 2642 2643 options = [] 2644 if self._match_text_seq("OPTIONS"): 2645 self._match_l_paren() 2646 k = self._parse_string() 2647 self._match(TokenType.EQ) 2648 v = self._parse_string() 2649 options = [k, v] 2650 self._match_r_paren() 2651 2652 self._match(TokenType.ALIAS) 2653 return self.expression( 2654 exp.Cache, 2655 this=table, 2656 lazy=lazy, 2657 options=options, 2658 expression=self._parse_select(nested=True), 2659 ) 2660 2661 def _parse_partition(self) -> t.Optional[exp.Partition]: 2662 if not self._match(TokenType.PARTITION): 2663 return None 2664 2665 return self.expression( 2666 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2667 ) 2668 2669 def _parse_value(self) -> t.Optional[exp.Tuple]: 2670 if self._match(TokenType.L_PAREN): 2671 expressions = self._parse_csv(self._parse_expression) 2672 self._match_r_paren() 2673 return self.expression(exp.Tuple, expressions=expressions) 2674 2675 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2676 expression = self._parse_expression() 2677 if expression: 2678 return self.expression(exp.Tuple, expressions=[expression]) 2679 return None 2680 2681 def _parse_projections(self) -> t.List[exp.Expression]: 2682 return self._parse_expressions() 2683 2684 def _parse_select( 2685 self, 2686 nested: bool = False, 2687 table: bool = False, 2688 parse_subquery_alias: bool = True, 2689 parse_set_operation: bool = True, 2690 ) -> t.Optional[exp.Expression]: 2691 cte = self._parse_with() 2692 2693 if cte: 2694 this = self._parse_statement() 2695 2696 if not this: 2697 self.raise_error("Failed to parse any statement following CTE") 2698 return cte 2699 2700 if "with" in this.arg_types: 2701 this.set("with", cte) 2702 else: 2703 self.raise_error(f"{this.key} does not support CTE") 2704 this = cte 2705 2706 return this 2707 2708 # duckdb supports leading with FROM x 2709 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2710 2711 if self._match(TokenType.SELECT): 2712 comments = self._prev_comments 2713 2714 hint = self._parse_hint() 2715 all_ = self._match(TokenType.ALL) 2716 distinct = self._match_set(self.DISTINCT_TOKENS) 2717 2718 kind = ( 2719 self._match(TokenType.ALIAS) 2720 and self._match_texts(("STRUCT", "VALUE")) 2721 and self._prev.text.upper() 2722 ) 2723 2724 if distinct: 2725 distinct = self.expression( 2726 exp.Distinct, 2727 on=self._parse_value() if self._match(TokenType.ON) else None, 2728 ) 2729 2730 if all_ and distinct: 2731 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2732 2733 limit = self._parse_limit(top=True) 2734 projections = self._parse_projections() 2735 2736 this = self.expression( 2737 exp.Select, 2738 kind=kind, 2739 hint=hint, 2740 distinct=distinct, 2741 expressions=projections, 2742 limit=limit, 2743 ) 2744 this.comments = comments 2745 2746 into = self._parse_into() 2747 if into: 2748 this.set("into", into) 2749 2750 if not from_: 2751 from_ = self._parse_from() 2752 2753 if from_: 2754 this.set("from", from_) 2755 2756 this = self._parse_query_modifiers(this) 2757 elif (table or nested) and self._match(TokenType.L_PAREN): 2758 if self._match(TokenType.PIVOT): 2759 this = self._parse_simplified_pivot() 2760 elif self._match(TokenType.FROM): 2761 this = exp.select("*").from_( 2762 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2763 ) 2764 else: 2765 this = ( 2766 self._parse_table() 2767 if table 2768 else self._parse_select(nested=True, parse_set_operation=False) 2769 ) 2770 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2771 2772 self._match_r_paren() 2773 2774 # We return early here so that the UNION isn't attached to the subquery by the 2775 # following call to _parse_set_operations, but instead becomes the parent node 2776 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2777 elif self._match(TokenType.VALUES, advance=False): 2778 this = self._parse_derived_table_values() 2779 elif from_: 2780 this = exp.select("*").from_(from_.this, copy=False) 2781 else: 2782 this = None 2783 2784 if parse_set_operation: 2785 return self._parse_set_operations(this) 2786 return this 2787 2788 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2789 if not skip_with_token and not self._match(TokenType.WITH): 2790 return None 2791 2792 comments = self._prev_comments 2793 recursive = self._match(TokenType.RECURSIVE) 2794 2795 expressions = [] 2796 while True: 2797 expressions.append(self._parse_cte()) 2798 2799 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2800 break 2801 else: 2802 self._match(TokenType.WITH) 2803 2804 return self.expression( 2805 exp.With, comments=comments, expressions=expressions, recursive=recursive 2806 ) 2807 2808 def _parse_cte(self) -> exp.CTE: 2809 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2810 if not alias or not alias.this: 2811 self.raise_error("Expected CTE to have alias") 2812 2813 self._match(TokenType.ALIAS) 2814 2815 if self._match_text_seq("NOT", "MATERIALIZED"): 2816 materialized = False 2817 elif self._match_text_seq("MATERIALIZED"): 2818 materialized = True 2819 else: 2820 materialized = None 2821 2822 return self.expression( 2823 exp.CTE, 2824 this=self._parse_wrapped(self._parse_statement), 2825 alias=alias, 2826 materialized=materialized, 2827 ) 2828 2829 def _parse_table_alias( 2830 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2831 ) -> t.Optional[exp.TableAlias]: 2832 any_token = self._match(TokenType.ALIAS) 2833 alias = ( 2834 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2835 or self._parse_string_as_identifier() 2836 ) 2837 2838 index = self._index 2839 if self._match(TokenType.L_PAREN): 2840 columns = self._parse_csv(self._parse_function_parameter) 2841 self._match_r_paren() if columns else self._retreat(index) 2842 else: 2843 columns = None 2844 2845 if not alias and not columns: 2846 return None 2847 2848 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2849 2850 # We bubble up comments from the Identifier to the TableAlias 2851 if isinstance(alias, exp.Identifier): 2852 table_alias.add_comments(alias.pop_comments()) 2853 2854 return table_alias 2855 2856 def _parse_subquery( 2857 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2858 ) -> t.Optional[exp.Subquery]: 2859 if not this: 2860 return None 2861 2862 return self.expression( 2863 exp.Subquery, 2864 this=this, 2865 pivots=self._parse_pivots(), 2866 alias=self._parse_table_alias() if parse_alias else None, 2867 ) 2868 2869 def _implicit_unnests_to_explicit(self, this: E) -> E: 2870 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2871 2872 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2873 for i, join in enumerate(this.args.get("joins") or []): 2874 table = join.this 2875 normalized_table = table.copy() 2876 normalized_table.meta["maybe_column"] = True 2877 normalized_table = _norm(normalized_table, dialect=self.dialect) 2878 2879 if isinstance(table, exp.Table) and not join.args.get("on"): 2880 if normalized_table.parts[0].name in refs: 2881 table_as_column = table.to_column() 2882 unnest = exp.Unnest(expressions=[table_as_column]) 2883 2884 # Table.to_column creates a parent Alias node that we want to convert to 2885 # a TableAlias and attach to the Unnest, so it matches the parser's output 2886 if isinstance(table.args.get("alias"), exp.TableAlias): 2887 table_as_column.replace(table_as_column.this) 2888 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2889 2890 table.replace(unnest) 2891 2892 refs.add(normalized_table.alias_or_name) 2893 2894 return this 2895 2896 def _parse_query_modifiers( 2897 self, this: t.Optional[exp.Expression] 2898 ) -> t.Optional[exp.Expression]: 2899 if isinstance(this, (exp.Query, exp.Table)): 2900 for join in self._parse_joins(): 2901 this.append("joins", join) 2902 for lateral in iter(self._parse_lateral, None): 2903 this.append("laterals", lateral) 2904 2905 while True: 2906 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2907 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2908 key, expression = parser(self) 2909 2910 if expression: 2911 this.set(key, expression) 2912 if key == "limit": 2913 offset = expression.args.pop("offset", None) 2914 2915 if offset: 2916 offset = exp.Offset(expression=offset) 2917 this.set("offset", offset) 2918 2919 limit_by_expressions = expression.expressions 2920 expression.set("expressions", None) 2921 offset.set("expressions", limit_by_expressions) 2922 continue 2923 break 2924 2925 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2926 this = self._implicit_unnests_to_explicit(this) 2927 2928 return this 2929 2930 def _parse_hint(self) -> t.Optional[exp.Hint]: 2931 if self._match(TokenType.HINT): 2932 hints = [] 2933 for hint in iter( 2934 lambda: self._parse_csv( 2935 lambda: self._parse_function() or self._parse_var(upper=True) 2936 ), 2937 [], 2938 ): 2939 hints.extend(hint) 2940 2941 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2942 self.raise_error("Expected */ after HINT") 2943 2944 return self.expression(exp.Hint, expressions=hints) 2945 2946 return None 2947 2948 def _parse_into(self) -> t.Optional[exp.Into]: 2949 if not self._match(TokenType.INTO): 2950 return None 2951 2952 temp = self._match(TokenType.TEMPORARY) 2953 unlogged = self._match_text_seq("UNLOGGED") 2954 self._match(TokenType.TABLE) 2955 2956 return self.expression( 2957 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2958 ) 2959 2960 def _parse_from( 2961 self, joins: bool = False, skip_from_token: bool = False 2962 ) -> t.Optional[exp.From]: 2963 if not skip_from_token and not self._match(TokenType.FROM): 2964 return None 2965 2966 return self.expression( 2967 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2968 ) 2969 2970 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2971 return self.expression( 2972 exp.MatchRecognizeMeasure, 2973 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2974 this=self._parse_expression(), 2975 ) 2976 2977 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2978 if not self._match(TokenType.MATCH_RECOGNIZE): 2979 return None 2980 2981 self._match_l_paren() 2982 2983 partition = self._parse_partition_by() 2984 order = self._parse_order() 2985 2986 measures = ( 2987 self._parse_csv(self._parse_match_recognize_measure) 2988 if self._match_text_seq("MEASURES") 2989 else None 2990 ) 2991 2992 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2993 rows = exp.var("ONE ROW PER MATCH") 2994 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2995 text = "ALL ROWS PER MATCH" 2996 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2997 text += " SHOW EMPTY MATCHES" 2998 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2999 text += " OMIT EMPTY MATCHES" 3000 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3001 text += " WITH UNMATCHED ROWS" 3002 rows = exp.var(text) 3003 else: 3004 rows = None 3005 3006 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3007 text = "AFTER MATCH SKIP" 3008 if self._match_text_seq("PAST", "LAST", "ROW"): 3009 text += " PAST LAST ROW" 3010 elif self._match_text_seq("TO", "NEXT", "ROW"): 3011 text += " TO NEXT ROW" 3012 elif self._match_text_seq("TO", "FIRST"): 3013 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3014 elif self._match_text_seq("TO", "LAST"): 3015 text += f" TO LAST {self._advance_any().text}" # type: ignore 3016 after = exp.var(text) 3017 else: 3018 after = None 3019 3020 if self._match_text_seq("PATTERN"): 3021 self._match_l_paren() 3022 3023 if not self._curr: 3024 self.raise_error("Expecting )", self._curr) 3025 3026 paren = 1 3027 start = self._curr 3028 3029 while self._curr and paren > 0: 3030 if self._curr.token_type == TokenType.L_PAREN: 3031 paren += 1 3032 if self._curr.token_type == TokenType.R_PAREN: 3033 paren -= 1 3034 3035 end = self._prev 3036 self._advance() 3037 3038 if paren > 0: 3039 self.raise_error("Expecting )", self._curr) 3040 3041 pattern = exp.var(self._find_sql(start, end)) 3042 else: 3043 pattern = None 3044 3045 define = ( 3046 self._parse_csv(self._parse_name_as_expression) 3047 if self._match_text_seq("DEFINE") 3048 else None 3049 ) 3050 3051 self._match_r_paren() 3052 3053 return self.expression( 3054 exp.MatchRecognize, 3055 partition_by=partition, 3056 order=order, 3057 measures=measures, 3058 rows=rows, 3059 after=after, 3060 pattern=pattern, 3061 define=define, 3062 alias=self._parse_table_alias(), 3063 ) 3064 3065 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3066 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3067 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3068 cross_apply = False 3069 3070 if cross_apply is not None: 3071 this = self._parse_select(table=True) 3072 view = None 3073 outer = None 3074 elif self._match(TokenType.LATERAL): 3075 this = self._parse_select(table=True) 3076 view = self._match(TokenType.VIEW) 3077 outer = self._match(TokenType.OUTER) 3078 else: 3079 return None 3080 3081 if not this: 3082 this = ( 3083 self._parse_unnest() 3084 or self._parse_function() 3085 or self._parse_id_var(any_token=False) 3086 ) 3087 3088 while self._match(TokenType.DOT): 3089 this = exp.Dot( 3090 this=this, 3091 expression=self._parse_function() or self._parse_id_var(any_token=False), 3092 ) 3093 3094 if view: 3095 table = self._parse_id_var(any_token=False) 3096 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3097 table_alias: t.Optional[exp.TableAlias] = self.expression( 3098 exp.TableAlias, this=table, columns=columns 3099 ) 3100 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3101 # We move the alias from the lateral's child node to the lateral itself 3102 table_alias = this.args["alias"].pop() 3103 else: 3104 table_alias = self._parse_table_alias() 3105 3106 return self.expression( 3107 exp.Lateral, 3108 this=this, 3109 view=view, 3110 outer=outer, 3111 alias=table_alias, 3112 cross_apply=cross_apply, 3113 ) 3114 3115 def _parse_join_parts( 3116 self, 3117 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3118 return ( 3119 self._match_set(self.JOIN_METHODS) and self._prev, 3120 self._match_set(self.JOIN_SIDES) and self._prev, 3121 self._match_set(self.JOIN_KINDS) and self._prev, 3122 ) 3123 3124 def _parse_join( 3125 self, skip_join_token: bool = False, parse_bracket: bool = False 3126 ) -> t.Optional[exp.Join]: 3127 if self._match(TokenType.COMMA): 3128 return self.expression(exp.Join, this=self._parse_table()) 3129 3130 index = self._index 3131 method, side, kind = self._parse_join_parts() 3132 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3133 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3134 3135 if not skip_join_token and not join: 3136 self._retreat(index) 3137 kind = None 3138 method = None 3139 side = None 3140 3141 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3142 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3143 3144 if not skip_join_token and not join and not outer_apply and not cross_apply: 3145 return None 3146 3147 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3148 3149 if method: 3150 kwargs["method"] = method.text 3151 if side: 3152 kwargs["side"] = side.text 3153 if kind: 3154 kwargs["kind"] = kind.text 3155 if hint: 3156 kwargs["hint"] = hint 3157 3158 if self._match(TokenType.MATCH_CONDITION): 3159 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3160 3161 if self._match(TokenType.ON): 3162 kwargs["on"] = self._parse_assignment() 3163 elif self._match(TokenType.USING): 3164 kwargs["using"] = self._parse_wrapped_id_vars() 3165 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3166 kind and kind.token_type == TokenType.CROSS 3167 ): 3168 index = self._index 3169 joins: t.Optional[list] = list(self._parse_joins()) 3170 3171 if joins and self._match(TokenType.ON): 3172 kwargs["on"] = self._parse_assignment() 3173 elif joins and self._match(TokenType.USING): 3174 kwargs["using"] = self._parse_wrapped_id_vars() 3175 else: 3176 joins = None 3177 self._retreat(index) 3178 3179 kwargs["this"].set("joins", joins if joins else None) 3180 3181 comments = [c for token in (method, side, kind) if token for c in token.comments] 3182 return self.expression(exp.Join, comments=comments, **kwargs) 3183 3184 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3185 this = self._parse_assignment() 3186 3187 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3188 return this 3189 3190 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3191 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3192 3193 return this 3194 3195 def _parse_index_params(self) -> exp.IndexParameters: 3196 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3197 3198 if self._match(TokenType.L_PAREN, advance=False): 3199 columns = self._parse_wrapped_csv(self._parse_with_operator) 3200 else: 3201 columns = None 3202 3203 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3204 partition_by = self._parse_partition_by() 3205 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3206 tablespace = ( 3207 self._parse_var(any_token=True) 3208 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3209 else None 3210 ) 3211 where = self._parse_where() 3212 3213 on = self._parse_field() if self._match(TokenType.ON) else None 3214 3215 return self.expression( 3216 exp.IndexParameters, 3217 using=using, 3218 columns=columns, 3219 include=include, 3220 partition_by=partition_by, 3221 where=where, 3222 with_storage=with_storage, 3223 tablespace=tablespace, 3224 on=on, 3225 ) 3226 3227 def _parse_index( 3228 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3229 ) -> t.Optional[exp.Index]: 3230 if index or anonymous: 3231 unique = None 3232 primary = None 3233 amp = None 3234 3235 self._match(TokenType.ON) 3236 self._match(TokenType.TABLE) # hive 3237 table = self._parse_table_parts(schema=True) 3238 else: 3239 unique = self._match(TokenType.UNIQUE) 3240 primary = self._match_text_seq("PRIMARY") 3241 amp = self._match_text_seq("AMP") 3242 3243 if not self._match(TokenType.INDEX): 3244 return None 3245 3246 index = self._parse_id_var() 3247 table = None 3248 3249 params = self._parse_index_params() 3250 3251 return self.expression( 3252 exp.Index, 3253 this=index, 3254 table=table, 3255 unique=unique, 3256 primary=primary, 3257 amp=amp, 3258 params=params, 3259 ) 3260 3261 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3262 hints: t.List[exp.Expression] = [] 3263 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3264 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3265 hints.append( 3266 self.expression( 3267 exp.WithTableHint, 3268 expressions=self._parse_csv( 3269 lambda: self._parse_function() or self._parse_var(any_token=True) 3270 ), 3271 ) 3272 ) 3273 self._match_r_paren() 3274 else: 3275 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3276 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3277 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3278 3279 self._match_set((TokenType.INDEX, TokenType.KEY)) 3280 if self._match(TokenType.FOR): 3281 hint.set("target", self._advance_any() and self._prev.text.upper()) 3282 3283 hint.set("expressions", self._parse_wrapped_id_vars()) 3284 hints.append(hint) 3285 3286 return hints or None 3287 3288 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3289 return ( 3290 (not schema and self._parse_function(optional_parens=False)) 3291 or self._parse_id_var(any_token=False) 3292 or self._parse_string_as_identifier() 3293 or self._parse_placeholder() 3294 ) 3295 3296 def _parse_table_parts( 3297 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3298 ) -> exp.Table: 3299 catalog = None 3300 db = None 3301 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3302 3303 while self._match(TokenType.DOT): 3304 if catalog: 3305 # This allows nesting the table in arbitrarily many dot expressions if needed 3306 table = self.expression( 3307 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3308 ) 3309 else: 3310 catalog = db 3311 db = table 3312 # "" used for tsql FROM a..b case 3313 table = self._parse_table_part(schema=schema) or "" 3314 3315 if ( 3316 wildcard 3317 and self._is_connected() 3318 and (isinstance(table, exp.Identifier) or not table) 3319 and self._match(TokenType.STAR) 3320 ): 3321 if isinstance(table, exp.Identifier): 3322 table.args["this"] += "*" 3323 else: 3324 table = exp.Identifier(this="*") 3325 3326 # We bubble up comments from the Identifier to the Table 3327 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3328 3329 if is_db_reference: 3330 catalog = db 3331 db = table 3332 table = None 3333 3334 if not table and not is_db_reference: 3335 self.raise_error(f"Expected table name but got {self._curr}") 3336 if not db and is_db_reference: 3337 self.raise_error(f"Expected database name but got {self._curr}") 3338 3339 return self.expression( 3340 exp.Table, 3341 comments=comments, 3342 this=table, 3343 db=db, 3344 catalog=catalog, 3345 pivots=self._parse_pivots(), 3346 ) 3347 3348 def _parse_table( 3349 self, 3350 schema: bool = False, 3351 joins: bool = False, 3352 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3353 parse_bracket: bool = False, 3354 is_db_reference: bool = False, 3355 parse_partition: bool = False, 3356 ) -> t.Optional[exp.Expression]: 3357 lateral = self._parse_lateral() 3358 if lateral: 3359 return lateral 3360 3361 unnest = self._parse_unnest() 3362 if unnest: 3363 return unnest 3364 3365 values = self._parse_derived_table_values() 3366 if values: 3367 return values 3368 3369 subquery = self._parse_select(table=True) 3370 if subquery: 3371 if not subquery.args.get("pivots"): 3372 subquery.set("pivots", self._parse_pivots()) 3373 return subquery 3374 3375 bracket = parse_bracket and self._parse_bracket(None) 3376 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3377 3378 only = self._match(TokenType.ONLY) 3379 3380 this = t.cast( 3381 exp.Expression, 3382 bracket 3383 or self._parse_bracket( 3384 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3385 ), 3386 ) 3387 3388 if only: 3389 this.set("only", only) 3390 3391 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3392 self._match_text_seq("*") 3393 3394 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3395 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3396 this.set("partition", self._parse_partition()) 3397 3398 if schema: 3399 return self._parse_schema(this=this) 3400 3401 version = self._parse_version() 3402 3403 if version: 3404 this.set("version", version) 3405 3406 if self.dialect.ALIAS_POST_TABLESAMPLE: 3407 table_sample = self._parse_table_sample() 3408 3409 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3410 if alias: 3411 this.set("alias", alias) 3412 3413 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3414 return self.expression( 3415 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3416 ) 3417 3418 this.set("hints", self._parse_table_hints()) 3419 3420 if not this.args.get("pivots"): 3421 this.set("pivots", self._parse_pivots()) 3422 3423 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3424 table_sample = self._parse_table_sample() 3425 3426 if table_sample: 3427 table_sample.set("this", this) 3428 this = table_sample 3429 3430 if joins: 3431 for join in self._parse_joins(): 3432 this.append("joins", join) 3433 3434 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3435 this.set("ordinality", True) 3436 this.set("alias", self._parse_table_alias()) 3437 3438 return this 3439 3440 def _parse_version(self) -> t.Optional[exp.Version]: 3441 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3442 this = "TIMESTAMP" 3443 elif self._match(TokenType.VERSION_SNAPSHOT): 3444 this = "VERSION" 3445 else: 3446 return None 3447 3448 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3449 kind = self._prev.text.upper() 3450 start = self._parse_bitwise() 3451 self._match_texts(("TO", "AND")) 3452 end = self._parse_bitwise() 3453 expression: t.Optional[exp.Expression] = self.expression( 3454 exp.Tuple, expressions=[start, end] 3455 ) 3456 elif self._match_text_seq("CONTAINED", "IN"): 3457 kind = "CONTAINED IN" 3458 expression = self.expression( 3459 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3460 ) 3461 elif self._match(TokenType.ALL): 3462 kind = "ALL" 3463 expression = None 3464 else: 3465 self._match_text_seq("AS", "OF") 3466 kind = "AS OF" 3467 expression = self._parse_type() 3468 3469 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3470 3471 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3472 if not self._match(TokenType.UNNEST): 3473 return None 3474 3475 expressions = self._parse_wrapped_csv(self._parse_equality) 3476 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3477 3478 alias = self._parse_table_alias() if with_alias else None 3479 3480 if alias: 3481 if self.dialect.UNNEST_COLUMN_ONLY: 3482 if alias.args.get("columns"): 3483 self.raise_error("Unexpected extra column alias in unnest.") 3484 3485 alias.set("columns", [alias.this]) 3486 alias.set("this", None) 3487 3488 columns = alias.args.get("columns") or [] 3489 if offset and len(expressions) < len(columns): 3490 offset = columns.pop() 3491 3492 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3493 self._match(TokenType.ALIAS) 3494 offset = self._parse_id_var( 3495 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3496 ) or exp.to_identifier("offset") 3497 3498 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3499 3500 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3501 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3502 if not is_derived and not self._match_text_seq("VALUES"): 3503 return None 3504 3505 expressions = self._parse_csv(self._parse_value) 3506 alias = self._parse_table_alias() 3507 3508 if is_derived: 3509 self._match_r_paren() 3510 3511 return self.expression( 3512 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3513 ) 3514 3515 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3516 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3517 as_modifier and self._match_text_seq("USING", "SAMPLE") 3518 ): 3519 return None 3520 3521 bucket_numerator = None 3522 bucket_denominator = None 3523 bucket_field = None 3524 percent = None 3525 size = None 3526 seed = None 3527 3528 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3529 matched_l_paren = self._match(TokenType.L_PAREN) 3530 3531 if self.TABLESAMPLE_CSV: 3532 num = None 3533 expressions = self._parse_csv(self._parse_primary) 3534 else: 3535 expressions = None 3536 num = ( 3537 self._parse_factor() 3538 if self._match(TokenType.NUMBER, advance=False) 3539 else self._parse_primary() or self._parse_placeholder() 3540 ) 3541 3542 if self._match_text_seq("BUCKET"): 3543 bucket_numerator = self._parse_number() 3544 self._match_text_seq("OUT", "OF") 3545 bucket_denominator = bucket_denominator = self._parse_number() 3546 self._match(TokenType.ON) 3547 bucket_field = self._parse_field() 3548 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3549 percent = num 3550 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3551 size = num 3552 else: 3553 percent = num 3554 3555 if matched_l_paren: 3556 self._match_r_paren() 3557 3558 if self._match(TokenType.L_PAREN): 3559 method = self._parse_var(upper=True) 3560 seed = self._match(TokenType.COMMA) and self._parse_number() 3561 self._match_r_paren() 3562 elif self._match_texts(("SEED", "REPEATABLE")): 3563 seed = self._parse_wrapped(self._parse_number) 3564 3565 if not method and self.DEFAULT_SAMPLING_METHOD: 3566 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3567 3568 return self.expression( 3569 exp.TableSample, 3570 expressions=expressions, 3571 method=method, 3572 bucket_numerator=bucket_numerator, 3573 bucket_denominator=bucket_denominator, 3574 bucket_field=bucket_field, 3575 percent=percent, 3576 size=size, 3577 seed=seed, 3578 ) 3579 3580 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3581 return list(iter(self._parse_pivot, None)) or None 3582 3583 def _parse_joins(self) -> t.Iterator[exp.Join]: 3584 return iter(self._parse_join, None) 3585 3586 # https://duckdb.org/docs/sql/statements/pivot 3587 def _parse_simplified_pivot(self) -> exp.Pivot: 3588 def _parse_on() -> t.Optional[exp.Expression]: 3589 this = self._parse_bitwise() 3590 return self._parse_in(this) if self._match(TokenType.IN) else this 3591 3592 this = self._parse_table() 3593 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3594 using = self._match(TokenType.USING) and self._parse_csv( 3595 lambda: self._parse_alias(self._parse_function()) 3596 ) 3597 group = self._parse_group() 3598 return self.expression( 3599 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3600 ) 3601 3602 def _parse_pivot_in(self) -> exp.In: 3603 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3604 this = self._parse_assignment() 3605 3606 self._match(TokenType.ALIAS) 3607 alias = self._parse_field() 3608 if alias: 3609 return self.expression(exp.PivotAlias, this=this, alias=alias) 3610 3611 return this 3612 3613 value = self._parse_column() 3614 3615 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3616 self.raise_error("Expecting IN (") 3617 3618 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3619 3620 self._match_r_paren() 3621 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3622 3623 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3624 index = self._index 3625 include_nulls = None 3626 3627 if self._match(TokenType.PIVOT): 3628 unpivot = False 3629 elif self._match(TokenType.UNPIVOT): 3630 unpivot = True 3631 3632 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3633 if self._match_text_seq("INCLUDE", "NULLS"): 3634 include_nulls = True 3635 elif self._match_text_seq("EXCLUDE", "NULLS"): 3636 include_nulls = False 3637 else: 3638 return None 3639 3640 expressions = [] 3641 3642 if not self._match(TokenType.L_PAREN): 3643 self._retreat(index) 3644 return None 3645 3646 if unpivot: 3647 expressions = self._parse_csv(self._parse_column) 3648 else: 3649 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3650 3651 if not expressions: 3652 self.raise_error("Failed to parse PIVOT's aggregation list") 3653 3654 if not self._match(TokenType.FOR): 3655 self.raise_error("Expecting FOR") 3656 3657 field = self._parse_pivot_in() 3658 3659 self._match_r_paren() 3660 3661 pivot = self.expression( 3662 exp.Pivot, 3663 expressions=expressions, 3664 field=field, 3665 unpivot=unpivot, 3666 include_nulls=include_nulls, 3667 ) 3668 3669 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3670 pivot.set("alias", self._parse_table_alias()) 3671 3672 if not unpivot: 3673 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3674 3675 columns: t.List[exp.Expression] = [] 3676 for fld in pivot.args["field"].expressions: 3677 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3678 for name in names: 3679 if self.PREFIXED_PIVOT_COLUMNS: 3680 name = f"{name}_{field_name}" if name else field_name 3681 else: 3682 name = f"{field_name}_{name}" if name else field_name 3683 3684 columns.append(exp.to_identifier(name)) 3685 3686 pivot.set("columns", columns) 3687 3688 return pivot 3689 3690 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3691 return [agg.alias for agg in aggregations] 3692 3693 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3694 if not skip_where_token and not self._match(TokenType.PREWHERE): 3695 return None 3696 3697 return self.expression( 3698 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3699 ) 3700 3701 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3702 if not skip_where_token and not self._match(TokenType.WHERE): 3703 return None 3704 3705 return self.expression( 3706 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3707 ) 3708 3709 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3710 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3711 return None 3712 3713 elements: t.Dict[str, t.Any] = defaultdict(list) 3714 3715 if self._match(TokenType.ALL): 3716 elements["all"] = True 3717 elif self._match(TokenType.DISTINCT): 3718 elements["all"] = False 3719 3720 while True: 3721 expressions = self._parse_csv( 3722 lambda: None 3723 if self._match(TokenType.ROLLUP, advance=False) 3724 else self._parse_assignment() 3725 ) 3726 if expressions: 3727 elements["expressions"].extend(expressions) 3728 3729 grouping_sets = self._parse_grouping_sets() 3730 if grouping_sets: 3731 elements["grouping_sets"].extend(grouping_sets) 3732 3733 rollup = None 3734 cube = None 3735 totals = None 3736 3737 index = self._index 3738 with_ = self._match(TokenType.WITH) 3739 if self._match(TokenType.ROLLUP): 3740 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3741 elements["rollup"].extend(ensure_list(rollup)) 3742 3743 if self._match(TokenType.CUBE): 3744 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3745 elements["cube"].extend(ensure_list(cube)) 3746 3747 if self._match_text_seq("TOTALS"): 3748 totals = True 3749 elements["totals"] = True # type: ignore 3750 3751 if not (grouping_sets or rollup or cube or totals): 3752 if with_: 3753 self._retreat(index) 3754 break 3755 3756 return self.expression(exp.Group, **elements) # type: ignore 3757 3758 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3759 if not self._match(TokenType.GROUPING_SETS): 3760 return None 3761 3762 return self._parse_wrapped_csv(self._parse_grouping_set) 3763 3764 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3765 if self._match(TokenType.L_PAREN): 3766 grouping_set = self._parse_csv(self._parse_column) 3767 self._match_r_paren() 3768 return self.expression(exp.Tuple, expressions=grouping_set) 3769 3770 return self._parse_column() 3771 3772 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3773 if not skip_having_token and not self._match(TokenType.HAVING): 3774 return None 3775 return self.expression(exp.Having, this=self._parse_assignment()) 3776 3777 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3778 if not self._match(TokenType.QUALIFY): 3779 return None 3780 return self.expression(exp.Qualify, this=self._parse_assignment()) 3781 3782 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3783 if skip_start_token: 3784 start = None 3785 elif self._match(TokenType.START_WITH): 3786 start = self._parse_assignment() 3787 else: 3788 return None 3789 3790 self._match(TokenType.CONNECT_BY) 3791 nocycle = self._match_text_seq("NOCYCLE") 3792 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3793 exp.Prior, this=self._parse_bitwise() 3794 ) 3795 connect = self._parse_assignment() 3796 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3797 3798 if not start and self._match(TokenType.START_WITH): 3799 start = self._parse_assignment() 3800 3801 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3802 3803 def _parse_name_as_expression(self) -> exp.Alias: 3804 return self.expression( 3805 exp.Alias, 3806 alias=self._parse_id_var(any_token=True), 3807 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3808 ) 3809 3810 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3811 if self._match_text_seq("INTERPOLATE"): 3812 return self._parse_wrapped_csv(self._parse_name_as_expression) 3813 return None 3814 3815 def _parse_order( 3816 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3817 ) -> t.Optional[exp.Expression]: 3818 siblings = None 3819 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3820 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3821 return this 3822 3823 siblings = True 3824 3825 return self.expression( 3826 exp.Order, 3827 this=this, 3828 expressions=self._parse_csv(self._parse_ordered), 3829 interpolate=self._parse_interpolate(), 3830 siblings=siblings, 3831 ) 3832 3833 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3834 if not self._match(token): 3835 return None 3836 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3837 3838 def _parse_ordered( 3839 self, parse_method: t.Optional[t.Callable] = None 3840 ) -> t.Optional[exp.Ordered]: 3841 this = parse_method() if parse_method else self._parse_assignment() 3842 if not this: 3843 return None 3844 3845 asc = self._match(TokenType.ASC) 3846 desc = self._match(TokenType.DESC) or (asc and False) 3847 3848 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3849 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3850 3851 nulls_first = is_nulls_first or False 3852 explicitly_null_ordered = is_nulls_first or is_nulls_last 3853 3854 if ( 3855 not explicitly_null_ordered 3856 and ( 3857 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3858 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3859 ) 3860 and self.dialect.NULL_ORDERING != "nulls_are_last" 3861 ): 3862 nulls_first = True 3863 3864 if self._match_text_seq("WITH", "FILL"): 3865 with_fill = self.expression( 3866 exp.WithFill, 3867 **{ # type: ignore 3868 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3869 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3870 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3871 }, 3872 ) 3873 else: 3874 with_fill = None 3875 3876 return self.expression( 3877 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3878 ) 3879 3880 def _parse_limit( 3881 self, 3882 this: t.Optional[exp.Expression] = None, 3883 top: bool = False, 3884 skip_limit_token: bool = False, 3885 ) -> t.Optional[exp.Expression]: 3886 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3887 comments = self._prev_comments 3888 if top: 3889 limit_paren = self._match(TokenType.L_PAREN) 3890 expression = self._parse_term() if limit_paren else self._parse_number() 3891 3892 if limit_paren: 3893 self._match_r_paren() 3894 else: 3895 expression = self._parse_term() 3896 3897 if self._match(TokenType.COMMA): 3898 offset = expression 3899 expression = self._parse_term() 3900 else: 3901 offset = None 3902 3903 limit_exp = self.expression( 3904 exp.Limit, 3905 this=this, 3906 expression=expression, 3907 offset=offset, 3908 comments=comments, 3909 expressions=self._parse_limit_by(), 3910 ) 3911 3912 return limit_exp 3913 3914 if self._match(TokenType.FETCH): 3915 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3916 direction = self._prev.text.upper() if direction else "FIRST" 3917 3918 count = self._parse_field(tokens=self.FETCH_TOKENS) 3919 percent = self._match(TokenType.PERCENT) 3920 3921 self._match_set((TokenType.ROW, TokenType.ROWS)) 3922 3923 only = self._match_text_seq("ONLY") 3924 with_ties = self._match_text_seq("WITH", "TIES") 3925 3926 if only and with_ties: 3927 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3928 3929 return self.expression( 3930 exp.Fetch, 3931 direction=direction, 3932 count=count, 3933 percent=percent, 3934 with_ties=with_ties, 3935 ) 3936 3937 return this 3938 3939 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3940 if not self._match(TokenType.OFFSET): 3941 return this 3942 3943 count = self._parse_term() 3944 self._match_set((TokenType.ROW, TokenType.ROWS)) 3945 3946 return self.expression( 3947 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3948 ) 3949 3950 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3951 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3952 3953 def _parse_locks(self) -> t.List[exp.Lock]: 3954 locks = [] 3955 while True: 3956 if self._match_text_seq("FOR", "UPDATE"): 3957 update = True 3958 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3959 "LOCK", "IN", "SHARE", "MODE" 3960 ): 3961 update = False 3962 else: 3963 break 3964 3965 expressions = None 3966 if self._match_text_seq("OF"): 3967 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3968 3969 wait: t.Optional[bool | exp.Expression] = None 3970 if self._match_text_seq("NOWAIT"): 3971 wait = True 3972 elif self._match_text_seq("WAIT"): 3973 wait = self._parse_primary() 3974 elif self._match_text_seq("SKIP", "LOCKED"): 3975 wait = False 3976 3977 locks.append( 3978 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3979 ) 3980 3981 return locks 3982 3983 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3984 while this and self._match_set(self.SET_OPERATIONS): 3985 token_type = self._prev.token_type 3986 3987 if token_type == TokenType.UNION: 3988 operation: t.Type[exp.SetOperation] = exp.Union 3989 elif token_type == TokenType.EXCEPT: 3990 operation = exp.Except 3991 else: 3992 operation = exp.Intersect 3993 3994 comments = self._prev.comments 3995 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3996 by_name = self._match_text_seq("BY", "NAME") 3997 expression = self._parse_select(nested=True, parse_set_operation=False) 3998 3999 this = self.expression( 4000 operation, 4001 comments=comments, 4002 this=this, 4003 distinct=distinct, 4004 by_name=by_name, 4005 expression=expression, 4006 ) 4007 4008 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4009 expression = this.expression 4010 4011 if expression: 4012 for arg in self.SET_OP_MODIFIERS: 4013 expr = expression.args.get(arg) 4014 if expr: 4015 this.set(arg, expr.pop()) 4016 4017 return this 4018 4019 def _parse_expression(self) -> t.Optional[exp.Expression]: 4020 return self._parse_alias(self._parse_assignment()) 4021 4022 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4023 this = self._parse_disjunction() 4024 4025 while self._match_set(self.ASSIGNMENT): 4026 this = self.expression( 4027 self.ASSIGNMENT[self._prev.token_type], 4028 this=this, 4029 comments=self._prev_comments, 4030 expression=self._parse_assignment(), 4031 ) 4032 4033 return this 4034 4035 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4036 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4037 4038 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4039 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4040 4041 def _parse_equality(self) -> t.Optional[exp.Expression]: 4042 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4043 4044 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4045 return self._parse_tokens(self._parse_range, self.COMPARISON) 4046 4047 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4048 this = this or self._parse_bitwise() 4049 negate = self._match(TokenType.NOT) 4050 4051 if self._match_set(self.RANGE_PARSERS): 4052 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4053 if not expression: 4054 return this 4055 4056 this = expression 4057 elif self._match(TokenType.ISNULL): 4058 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4059 4060 # Postgres supports ISNULL and NOTNULL for conditions. 4061 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4062 if self._match(TokenType.NOTNULL): 4063 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4064 this = self.expression(exp.Not, this=this) 4065 4066 if negate: 4067 this = self.expression(exp.Not, this=this) 4068 4069 if self._match(TokenType.IS): 4070 this = self._parse_is(this) 4071 4072 return this 4073 4074 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4075 index = self._index - 1 4076 negate = self._match(TokenType.NOT) 4077 4078 if self._match_text_seq("DISTINCT", "FROM"): 4079 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4080 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4081 4082 expression = self._parse_null() or self._parse_boolean() 4083 if not expression: 4084 self._retreat(index) 4085 return None 4086 4087 this = self.expression(exp.Is, this=this, expression=expression) 4088 return self.expression(exp.Not, this=this) if negate else this 4089 4090 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4091 unnest = self._parse_unnest(with_alias=False) 4092 if unnest: 4093 this = self.expression(exp.In, this=this, unnest=unnest) 4094 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4095 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4096 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4097 4098 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4099 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4100 else: 4101 this = self.expression(exp.In, this=this, expressions=expressions) 4102 4103 if matched_l_paren: 4104 self._match_r_paren(this) 4105 elif not self._match(TokenType.R_BRACKET, expression=this): 4106 self.raise_error("Expecting ]") 4107 else: 4108 this = self.expression(exp.In, this=this, field=self._parse_field()) 4109 4110 return this 4111 4112 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4113 low = self._parse_bitwise() 4114 self._match(TokenType.AND) 4115 high = self._parse_bitwise() 4116 return self.expression(exp.Between, this=this, low=low, high=high) 4117 4118 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4119 if not self._match(TokenType.ESCAPE): 4120 return this 4121 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4122 4123 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4124 index = self._index 4125 4126 if not self._match(TokenType.INTERVAL) and match_interval: 4127 return None 4128 4129 if self._match(TokenType.STRING, advance=False): 4130 this = self._parse_primary() 4131 else: 4132 this = self._parse_term() 4133 4134 if not this or ( 4135 isinstance(this, exp.Column) 4136 and not this.table 4137 and not this.this.quoted 4138 and this.name.upper() == "IS" 4139 ): 4140 self._retreat(index) 4141 return None 4142 4143 unit = self._parse_function() or ( 4144 not self._match(TokenType.ALIAS, advance=False) 4145 and self._parse_var(any_token=True, upper=True) 4146 ) 4147 4148 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4149 # each INTERVAL expression into this canonical form so it's easy to transpile 4150 if this and this.is_number: 4151 this = exp.Literal.string(this.to_py()) 4152 elif this and this.is_string: 4153 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4154 if len(parts) == 1: 4155 if unit: 4156 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4157 self._retreat(self._index - 1) 4158 4159 this = exp.Literal.string(parts[0][0]) 4160 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4161 4162 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4163 unit = self.expression( 4164 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4165 ) 4166 4167 interval = self.expression(exp.Interval, this=this, unit=unit) 4168 4169 index = self._index 4170 self._match(TokenType.PLUS) 4171 4172 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4173 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4174 return self.expression( 4175 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4176 ) 4177 4178 self._retreat(index) 4179 return interval 4180 4181 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4182 this = self._parse_term() 4183 4184 while True: 4185 if self._match_set(self.BITWISE): 4186 this = self.expression( 4187 self.BITWISE[self._prev.token_type], 4188 this=this, 4189 expression=self._parse_term(), 4190 ) 4191 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4192 this = self.expression( 4193 exp.DPipe, 4194 this=this, 4195 expression=self._parse_term(), 4196 safe=not self.dialect.STRICT_STRING_CONCAT, 4197 ) 4198 elif self._match(TokenType.DQMARK): 4199 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4200 elif self._match_pair(TokenType.LT, TokenType.LT): 4201 this = self.expression( 4202 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4203 ) 4204 elif self._match_pair(TokenType.GT, TokenType.GT): 4205 this = self.expression( 4206 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4207 ) 4208 else: 4209 break 4210 4211 return this 4212 4213 def _parse_term(self) -> t.Optional[exp.Expression]: 4214 return self._parse_tokens(self._parse_factor, self.TERM) 4215 4216 def _parse_factor(self) -> t.Optional[exp.Expression]: 4217 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4218 this = parse_method() 4219 4220 while self._match_set(self.FACTOR): 4221 klass = self.FACTOR[self._prev.token_type] 4222 comments = self._prev_comments 4223 expression = parse_method() 4224 4225 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4226 self._retreat(self._index - 1) 4227 return this 4228 4229 this = self.expression(klass, this=this, comments=comments, expression=expression) 4230 4231 if isinstance(this, exp.Div): 4232 this.args["typed"] = self.dialect.TYPED_DIVISION 4233 this.args["safe"] = self.dialect.SAFE_DIVISION 4234 4235 return this 4236 4237 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4238 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4239 4240 def _parse_unary(self) -> t.Optional[exp.Expression]: 4241 if self._match_set(self.UNARY_PARSERS): 4242 return self.UNARY_PARSERS[self._prev.token_type](self) 4243 return self._parse_at_time_zone(self._parse_type()) 4244 4245 def _parse_type( 4246 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4247 ) -> t.Optional[exp.Expression]: 4248 interval = parse_interval and self._parse_interval() 4249 if interval: 4250 return interval 4251 4252 index = self._index 4253 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4254 4255 if data_type: 4256 index2 = self._index 4257 this = self._parse_primary() 4258 4259 if isinstance(this, exp.Literal): 4260 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4261 if parser: 4262 return parser(self, this, data_type) 4263 4264 return self.expression(exp.Cast, this=this, to=data_type) 4265 4266 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4267 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4268 # 4269 # If the index difference here is greater than 1, that means the parser itself must have 4270 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4271 # 4272 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4273 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4274 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4275 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4276 # 4277 # In these cases, we don't really want to return the converted type, but instead retreat 4278 # and try to parse a Column or Identifier in the section below. 4279 if data_type.expressions and index2 - index > 1: 4280 self._retreat(index2) 4281 return self._parse_column_ops(data_type) 4282 4283 self._retreat(index) 4284 4285 if fallback_to_identifier: 4286 return self._parse_id_var() 4287 4288 this = self._parse_column() 4289 return this and self._parse_column_ops(this) 4290 4291 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4292 this = self._parse_type() 4293 if not this: 4294 return None 4295 4296 if isinstance(this, exp.Column) and not this.table: 4297 this = exp.var(this.name.upper()) 4298 4299 return self.expression( 4300 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4301 ) 4302 4303 def _parse_types( 4304 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4305 ) -> t.Optional[exp.Expression]: 4306 index = self._index 4307 4308 this: t.Optional[exp.Expression] = None 4309 prefix = self._match_text_seq("SYSUDTLIB", ".") 4310 4311 if not self._match_set(self.TYPE_TOKENS): 4312 identifier = allow_identifiers and self._parse_id_var( 4313 any_token=False, tokens=(TokenType.VAR,) 4314 ) 4315 if isinstance(identifier, exp.Identifier): 4316 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4317 4318 if len(tokens) != 1: 4319 self.raise_error("Unexpected identifier", self._prev) 4320 4321 if tokens[0].token_type in self.TYPE_TOKENS: 4322 self._prev = tokens[0] 4323 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4324 type_name = identifier.name 4325 4326 while self._match(TokenType.DOT): 4327 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4328 4329 this = exp.DataType.build(type_name, udt=True) 4330 else: 4331 self._retreat(self._index - 1) 4332 return None 4333 else: 4334 return None 4335 4336 type_token = self._prev.token_type 4337 4338 if type_token == TokenType.PSEUDO_TYPE: 4339 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4340 4341 if type_token == TokenType.OBJECT_IDENTIFIER: 4342 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4343 4344 # https://materialize.com/docs/sql/types/map/ 4345 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4346 key_type = self._parse_types( 4347 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4348 ) 4349 if not self._match(TokenType.FARROW): 4350 self._retreat(index) 4351 return None 4352 4353 value_type = self._parse_types( 4354 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4355 ) 4356 if not self._match(TokenType.R_BRACKET): 4357 self._retreat(index) 4358 return None 4359 4360 return exp.DataType( 4361 this=exp.DataType.Type.MAP, 4362 expressions=[key_type, value_type], 4363 nested=True, 4364 prefix=prefix, 4365 ) 4366 4367 nested = type_token in self.NESTED_TYPE_TOKENS 4368 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4369 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4370 expressions = None 4371 maybe_func = False 4372 4373 if self._match(TokenType.L_PAREN): 4374 if is_struct: 4375 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4376 elif nested: 4377 expressions = self._parse_csv( 4378 lambda: self._parse_types( 4379 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4380 ) 4381 ) 4382 elif type_token in self.ENUM_TYPE_TOKENS: 4383 expressions = self._parse_csv(self._parse_equality) 4384 elif is_aggregate: 4385 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4386 any_token=False, tokens=(TokenType.VAR,) 4387 ) 4388 if not func_or_ident or not self._match(TokenType.COMMA): 4389 return None 4390 expressions = self._parse_csv( 4391 lambda: self._parse_types( 4392 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4393 ) 4394 ) 4395 expressions.insert(0, func_or_ident) 4396 else: 4397 expressions = self._parse_csv(self._parse_type_size) 4398 4399 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4400 if type_token == TokenType.VECTOR and len(expressions) == 2: 4401 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4402 4403 if not expressions or not self._match(TokenType.R_PAREN): 4404 self._retreat(index) 4405 return None 4406 4407 maybe_func = True 4408 4409 values: t.Optional[t.List[exp.Expression]] = None 4410 4411 if nested and self._match(TokenType.LT): 4412 if is_struct: 4413 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4414 else: 4415 expressions = self._parse_csv( 4416 lambda: self._parse_types( 4417 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4418 ) 4419 ) 4420 4421 if not self._match(TokenType.GT): 4422 self.raise_error("Expecting >") 4423 4424 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4425 values = self._parse_csv(self._parse_assignment) 4426 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4427 4428 if type_token in self.TIMESTAMPS: 4429 if self._match_text_seq("WITH", "TIME", "ZONE"): 4430 maybe_func = False 4431 tz_type = ( 4432 exp.DataType.Type.TIMETZ 4433 if type_token in self.TIMES 4434 else exp.DataType.Type.TIMESTAMPTZ 4435 ) 4436 this = exp.DataType(this=tz_type, expressions=expressions) 4437 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4438 maybe_func = False 4439 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4440 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4441 maybe_func = False 4442 elif type_token == TokenType.INTERVAL: 4443 unit = self._parse_var(upper=True) 4444 if unit: 4445 if self._match_text_seq("TO"): 4446 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4447 4448 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4449 else: 4450 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4451 4452 if maybe_func and check_func: 4453 index2 = self._index 4454 peek = self._parse_string() 4455 4456 if not peek: 4457 self._retreat(index) 4458 return None 4459 4460 self._retreat(index2) 4461 4462 if not this: 4463 if self._match_text_seq("UNSIGNED"): 4464 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4465 if not unsigned_type_token: 4466 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4467 4468 type_token = unsigned_type_token or type_token 4469 4470 this = exp.DataType( 4471 this=exp.DataType.Type[type_token.value], 4472 expressions=expressions, 4473 nested=nested, 4474 values=values, 4475 prefix=prefix, 4476 ) 4477 elif expressions: 4478 this.set("expressions", expressions) 4479 4480 # https://materialize.com/docs/sql/types/list/#type-name 4481 while self._match(TokenType.LIST): 4482 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4483 4484 index = self._index 4485 4486 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4487 matched_array = self._match(TokenType.ARRAY) 4488 4489 while self._curr: 4490 matched_l_bracket = self._match(TokenType.L_BRACKET) 4491 if not matched_l_bracket and not matched_array: 4492 break 4493 4494 matched_array = False 4495 values = self._parse_csv(self._parse_assignment) or None 4496 if values and not schema: 4497 self._retreat(index) 4498 break 4499 4500 this = exp.DataType( 4501 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4502 ) 4503 self._match(TokenType.R_BRACKET) 4504 4505 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4506 converter = self.TYPE_CONVERTERS.get(this.this) 4507 if converter: 4508 this = converter(t.cast(exp.DataType, this)) 4509 4510 return this 4511 4512 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4513 index = self._index 4514 4515 if ( 4516 self._curr 4517 and self._next 4518 and self._curr.token_type in self.TYPE_TOKENS 4519 and self._next.token_type in self.TYPE_TOKENS 4520 ): 4521 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4522 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4523 this = self._parse_id_var() 4524 else: 4525 this = ( 4526 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4527 or self._parse_id_var() 4528 ) 4529 4530 self._match(TokenType.COLON) 4531 4532 if ( 4533 type_required 4534 and not isinstance(this, exp.DataType) 4535 and not self._match_set(self.TYPE_TOKENS, advance=False) 4536 ): 4537 self._retreat(index) 4538 return self._parse_types() 4539 4540 return self._parse_column_def(this) 4541 4542 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4543 if not self._match_text_seq("AT", "TIME", "ZONE"): 4544 return this 4545 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4546 4547 def _parse_column(self) -> t.Optional[exp.Expression]: 4548 this = self._parse_column_reference() 4549 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4550 4551 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4552 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4553 4554 return column 4555 4556 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4557 this = self._parse_field() 4558 if ( 4559 not this 4560 and self._match(TokenType.VALUES, advance=False) 4561 and self.VALUES_FOLLOWED_BY_PAREN 4562 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4563 ): 4564 this = self._parse_id_var() 4565 4566 if isinstance(this, exp.Identifier): 4567 # We bubble up comments from the Identifier to the Column 4568 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4569 4570 return this 4571 4572 def _parse_colon_as_variant_extract( 4573 self, this: t.Optional[exp.Expression] 4574 ) -> t.Optional[exp.Expression]: 4575 casts = [] 4576 json_path = [] 4577 4578 while self._match(TokenType.COLON): 4579 start_index = self._index 4580 4581 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4582 path = self._parse_column_ops( 4583 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4584 ) 4585 4586 # The cast :: operator has a lower precedence than the extraction operator :, so 4587 # we rearrange the AST appropriately to avoid casting the JSON path 4588 while isinstance(path, exp.Cast): 4589 casts.append(path.to) 4590 path = path.this 4591 4592 if casts: 4593 dcolon_offset = next( 4594 i 4595 for i, t in enumerate(self._tokens[start_index:]) 4596 if t.token_type == TokenType.DCOLON 4597 ) 4598 end_token = self._tokens[start_index + dcolon_offset - 1] 4599 else: 4600 end_token = self._prev 4601 4602 if path: 4603 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4604 4605 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4606 # Databricks transforms it back to the colon/dot notation 4607 if json_path: 4608 this = self.expression( 4609 exp.JSONExtract, 4610 this=this, 4611 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4612 variant_extract=True, 4613 ) 4614 4615 while casts: 4616 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4617 4618 return this 4619 4620 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4621 return self._parse_types() 4622 4623 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4624 this = self._parse_bracket(this) 4625 4626 while self._match_set(self.COLUMN_OPERATORS): 4627 op_token = self._prev.token_type 4628 op = self.COLUMN_OPERATORS.get(op_token) 4629 4630 if op_token == TokenType.DCOLON: 4631 field = self._parse_dcolon() 4632 if not field: 4633 self.raise_error("Expected type") 4634 elif op and self._curr: 4635 field = self._parse_column_reference() 4636 else: 4637 field = self._parse_field(any_token=True, anonymous_func=True) 4638 4639 if isinstance(field, exp.Func) and this: 4640 # bigquery allows function calls like x.y.count(...) 4641 # SAFE.SUBSTR(...) 4642 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4643 this = exp.replace_tree( 4644 this, 4645 lambda n: ( 4646 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4647 if n.table 4648 else n.this 4649 ) 4650 if isinstance(n, exp.Column) 4651 else n, 4652 ) 4653 4654 if op: 4655 this = op(self, this, field) 4656 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4657 this = self.expression( 4658 exp.Column, 4659 this=field, 4660 table=this.this, 4661 db=this.args.get("table"), 4662 catalog=this.args.get("db"), 4663 ) 4664 else: 4665 this = self.expression(exp.Dot, this=this, expression=field) 4666 4667 this = self._parse_bracket(this) 4668 4669 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4670 4671 def _parse_primary(self) -> t.Optional[exp.Expression]: 4672 if self._match_set(self.PRIMARY_PARSERS): 4673 token_type = self._prev.token_type 4674 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4675 4676 if token_type == TokenType.STRING: 4677 expressions = [primary] 4678 while self._match(TokenType.STRING): 4679 expressions.append(exp.Literal.string(self._prev.text)) 4680 4681 if len(expressions) > 1: 4682 return self.expression(exp.Concat, expressions=expressions) 4683 4684 return primary 4685 4686 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4687 return exp.Literal.number(f"0.{self._prev.text}") 4688 4689 if self._match(TokenType.L_PAREN): 4690 comments = self._prev_comments 4691 query = self._parse_select() 4692 4693 if query: 4694 expressions = [query] 4695 else: 4696 expressions = self._parse_expressions() 4697 4698 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4699 4700 if not this and self._match(TokenType.R_PAREN, advance=False): 4701 this = self.expression(exp.Tuple) 4702 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4703 this = self._parse_subquery(this=this, parse_alias=False) 4704 elif isinstance(this, exp.Subquery): 4705 this = self._parse_subquery( 4706 this=self._parse_set_operations(this), parse_alias=False 4707 ) 4708 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4709 this = self.expression(exp.Tuple, expressions=expressions) 4710 else: 4711 this = self.expression(exp.Paren, this=this) 4712 4713 if this: 4714 this.add_comments(comments) 4715 4716 self._match_r_paren(expression=this) 4717 return this 4718 4719 return None 4720 4721 def _parse_field( 4722 self, 4723 any_token: bool = False, 4724 tokens: t.Optional[t.Collection[TokenType]] = None, 4725 anonymous_func: bool = False, 4726 ) -> t.Optional[exp.Expression]: 4727 if anonymous_func: 4728 field = ( 4729 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4730 or self._parse_primary() 4731 ) 4732 else: 4733 field = self._parse_primary() or self._parse_function( 4734 anonymous=anonymous_func, any_token=any_token 4735 ) 4736 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4737 4738 def _parse_function( 4739 self, 4740 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4741 anonymous: bool = False, 4742 optional_parens: bool = True, 4743 any_token: bool = False, 4744 ) -> t.Optional[exp.Expression]: 4745 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4746 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4747 fn_syntax = False 4748 if ( 4749 self._match(TokenType.L_BRACE, advance=False) 4750 and self._next 4751 and self._next.text.upper() == "FN" 4752 ): 4753 self._advance(2) 4754 fn_syntax = True 4755 4756 func = self._parse_function_call( 4757 functions=functions, 4758 anonymous=anonymous, 4759 optional_parens=optional_parens, 4760 any_token=any_token, 4761 ) 4762 4763 if fn_syntax: 4764 self._match(TokenType.R_BRACE) 4765 4766 return func 4767 4768 def _parse_function_call( 4769 self, 4770 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4771 anonymous: bool = False, 4772 optional_parens: bool = True, 4773 any_token: bool = False, 4774 ) -> t.Optional[exp.Expression]: 4775 if not self._curr: 4776 return None 4777 4778 comments = self._curr.comments 4779 token_type = self._curr.token_type 4780 this = self._curr.text 4781 upper = this.upper() 4782 4783 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4784 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4785 self._advance() 4786 return self._parse_window(parser(self)) 4787 4788 if not self._next or self._next.token_type != TokenType.L_PAREN: 4789 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4790 self._advance() 4791 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4792 4793 return None 4794 4795 if any_token: 4796 if token_type in self.RESERVED_TOKENS: 4797 return None 4798 elif token_type not in self.FUNC_TOKENS: 4799 return None 4800 4801 self._advance(2) 4802 4803 parser = self.FUNCTION_PARSERS.get(upper) 4804 if parser and not anonymous: 4805 this = parser(self) 4806 else: 4807 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4808 4809 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4810 this = self.expression(subquery_predicate, this=self._parse_select()) 4811 self._match_r_paren() 4812 return this 4813 4814 if functions is None: 4815 functions = self.FUNCTIONS 4816 4817 function = functions.get(upper) 4818 4819 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4820 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4821 4822 if alias: 4823 args = self._kv_to_prop_eq(args) 4824 4825 if function and not anonymous: 4826 if "dialect" in function.__code__.co_varnames: 4827 func = function(args, dialect=self.dialect) 4828 else: 4829 func = function(args) 4830 4831 func = self.validate_expression(func, args) 4832 if not self.dialect.NORMALIZE_FUNCTIONS: 4833 func.meta["name"] = this 4834 4835 this = func 4836 else: 4837 if token_type == TokenType.IDENTIFIER: 4838 this = exp.Identifier(this=this, quoted=True) 4839 this = self.expression(exp.Anonymous, this=this, expressions=args) 4840 4841 if isinstance(this, exp.Expression): 4842 this.add_comments(comments) 4843 4844 self._match_r_paren(this) 4845 return self._parse_window(this) 4846 4847 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4848 transformed = [] 4849 4850 for e in expressions: 4851 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4852 if isinstance(e, exp.Alias): 4853 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4854 4855 if not isinstance(e, exp.PropertyEQ): 4856 e = self.expression( 4857 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4858 ) 4859 4860 if isinstance(e.this, exp.Column): 4861 e.this.replace(e.this.this) 4862 4863 transformed.append(e) 4864 4865 return transformed 4866 4867 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4868 return self._parse_column_def(self._parse_id_var()) 4869 4870 def _parse_user_defined_function( 4871 self, kind: t.Optional[TokenType] = None 4872 ) -> t.Optional[exp.Expression]: 4873 this = self._parse_id_var() 4874 4875 while self._match(TokenType.DOT): 4876 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4877 4878 if not self._match(TokenType.L_PAREN): 4879 return this 4880 4881 expressions = self._parse_csv(self._parse_function_parameter) 4882 self._match_r_paren() 4883 return self.expression( 4884 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4885 ) 4886 4887 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4888 literal = self._parse_primary() 4889 if literal: 4890 return self.expression(exp.Introducer, this=token.text, expression=literal) 4891 4892 return self.expression(exp.Identifier, this=token.text) 4893 4894 def _parse_session_parameter(self) -> exp.SessionParameter: 4895 kind = None 4896 this = self._parse_id_var() or self._parse_primary() 4897 4898 if this and self._match(TokenType.DOT): 4899 kind = this.name 4900 this = self._parse_var() or self._parse_primary() 4901 4902 return self.expression(exp.SessionParameter, this=this, kind=kind) 4903 4904 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4905 return self._parse_id_var() 4906 4907 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4908 index = self._index 4909 4910 if self._match(TokenType.L_PAREN): 4911 expressions = t.cast( 4912 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4913 ) 4914 4915 if not self._match(TokenType.R_PAREN): 4916 self._retreat(index) 4917 else: 4918 expressions = [self._parse_lambda_arg()] 4919 4920 if self._match_set(self.LAMBDAS): 4921 return self.LAMBDAS[self._prev.token_type](self, expressions) 4922 4923 self._retreat(index) 4924 4925 this: t.Optional[exp.Expression] 4926 4927 if self._match(TokenType.DISTINCT): 4928 this = self.expression( 4929 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4930 ) 4931 else: 4932 this = self._parse_select_or_expression(alias=alias) 4933 4934 return self._parse_limit( 4935 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4936 ) 4937 4938 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4939 index = self._index 4940 if not self._match(TokenType.L_PAREN): 4941 return this 4942 4943 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4944 # expr can be of both types 4945 if self._match_set(self.SELECT_START_TOKENS): 4946 self._retreat(index) 4947 return this 4948 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4949 self._match_r_paren() 4950 return self.expression(exp.Schema, this=this, expressions=args) 4951 4952 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4953 return self._parse_column_def(self._parse_field(any_token=True)) 4954 4955 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4956 # column defs are not really columns, they're identifiers 4957 if isinstance(this, exp.Column): 4958 this = this.this 4959 4960 kind = self._parse_types(schema=True) 4961 4962 if self._match_text_seq("FOR", "ORDINALITY"): 4963 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4964 4965 constraints: t.List[exp.Expression] = [] 4966 4967 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4968 ("ALIAS", "MATERIALIZED") 4969 ): 4970 persisted = self._prev.text.upper() == "MATERIALIZED" 4971 constraints.append( 4972 self.expression( 4973 exp.ComputedColumnConstraint, 4974 this=self._parse_assignment(), 4975 persisted=persisted or self._match_text_seq("PERSISTED"), 4976 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4977 ) 4978 ) 4979 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4980 self._match(TokenType.ALIAS) 4981 constraints.append( 4982 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4983 ) 4984 4985 while True: 4986 constraint = self._parse_column_constraint() 4987 if not constraint: 4988 break 4989 constraints.append(constraint) 4990 4991 if not kind and not constraints: 4992 return this 4993 4994 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4995 4996 def _parse_auto_increment( 4997 self, 4998 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4999 start = None 5000 increment = None 5001 5002 if self._match(TokenType.L_PAREN, advance=False): 5003 args = self._parse_wrapped_csv(self._parse_bitwise) 5004 start = seq_get(args, 0) 5005 increment = seq_get(args, 1) 5006 elif self._match_text_seq("START"): 5007 start = self._parse_bitwise() 5008 self._match_text_seq("INCREMENT") 5009 increment = self._parse_bitwise() 5010 5011 if start and increment: 5012 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5013 5014 return exp.AutoIncrementColumnConstraint() 5015 5016 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5017 if not self._match_text_seq("REFRESH"): 5018 self._retreat(self._index - 1) 5019 return None 5020 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5021 5022 def _parse_compress(self) -> exp.CompressColumnConstraint: 5023 if self._match(TokenType.L_PAREN, advance=False): 5024 return self.expression( 5025 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5026 ) 5027 5028 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5029 5030 def _parse_generated_as_identity( 5031 self, 5032 ) -> ( 5033 exp.GeneratedAsIdentityColumnConstraint 5034 | exp.ComputedColumnConstraint 5035 | exp.GeneratedAsRowColumnConstraint 5036 ): 5037 if self._match_text_seq("BY", "DEFAULT"): 5038 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5039 this = self.expression( 5040 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5041 ) 5042 else: 5043 self._match_text_seq("ALWAYS") 5044 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5045 5046 self._match(TokenType.ALIAS) 5047 5048 if self._match_text_seq("ROW"): 5049 start = self._match_text_seq("START") 5050 if not start: 5051 self._match(TokenType.END) 5052 hidden = self._match_text_seq("HIDDEN") 5053 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5054 5055 identity = self._match_text_seq("IDENTITY") 5056 5057 if self._match(TokenType.L_PAREN): 5058 if self._match(TokenType.START_WITH): 5059 this.set("start", self._parse_bitwise()) 5060 if self._match_text_seq("INCREMENT", "BY"): 5061 this.set("increment", self._parse_bitwise()) 5062 if self._match_text_seq("MINVALUE"): 5063 this.set("minvalue", self._parse_bitwise()) 5064 if self._match_text_seq("MAXVALUE"): 5065 this.set("maxvalue", self._parse_bitwise()) 5066 5067 if self._match_text_seq("CYCLE"): 5068 this.set("cycle", True) 5069 elif self._match_text_seq("NO", "CYCLE"): 5070 this.set("cycle", False) 5071 5072 if not identity: 5073 this.set("expression", self._parse_range()) 5074 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5075 args = self._parse_csv(self._parse_bitwise) 5076 this.set("start", seq_get(args, 0)) 5077 this.set("increment", seq_get(args, 1)) 5078 5079 self._match_r_paren() 5080 5081 return this 5082 5083 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5084 self._match_text_seq("LENGTH") 5085 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5086 5087 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5088 if self._match_text_seq("NULL"): 5089 return self.expression(exp.NotNullColumnConstraint) 5090 if self._match_text_seq("CASESPECIFIC"): 5091 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5092 if self._match_text_seq("FOR", "REPLICATION"): 5093 return self.expression(exp.NotForReplicationColumnConstraint) 5094 return None 5095 5096 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5097 if self._match(TokenType.CONSTRAINT): 5098 this = self._parse_id_var() 5099 else: 5100 this = None 5101 5102 if self._match_texts(self.CONSTRAINT_PARSERS): 5103 return self.expression( 5104 exp.ColumnConstraint, 5105 this=this, 5106 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5107 ) 5108 5109 return this 5110 5111 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5112 if not self._match(TokenType.CONSTRAINT): 5113 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5114 5115 return self.expression( 5116 exp.Constraint, 5117 this=self._parse_id_var(), 5118 expressions=self._parse_unnamed_constraints(), 5119 ) 5120 5121 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5122 constraints = [] 5123 while True: 5124 constraint = self._parse_unnamed_constraint() or self._parse_function() 5125 if not constraint: 5126 break 5127 constraints.append(constraint) 5128 5129 return constraints 5130 5131 def _parse_unnamed_constraint( 5132 self, constraints: t.Optional[t.Collection[str]] = None 5133 ) -> t.Optional[exp.Expression]: 5134 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5135 constraints or self.CONSTRAINT_PARSERS 5136 ): 5137 return None 5138 5139 constraint = self._prev.text.upper() 5140 if constraint not in self.CONSTRAINT_PARSERS: 5141 self.raise_error(f"No parser found for schema constraint {constraint}.") 5142 5143 return self.CONSTRAINT_PARSERS[constraint](self) 5144 5145 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5146 self._match_text_seq("KEY") 5147 return self.expression( 5148 exp.UniqueColumnConstraint, 5149 this=self._parse_schema(self._parse_id_var(any_token=False)), 5150 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5151 on_conflict=self._parse_on_conflict(), 5152 ) 5153 5154 def _parse_key_constraint_options(self) -> t.List[str]: 5155 options = [] 5156 while True: 5157 if not self._curr: 5158 break 5159 5160 if self._match(TokenType.ON): 5161 action = None 5162 on = self._advance_any() and self._prev.text 5163 5164 if self._match_text_seq("NO", "ACTION"): 5165 action = "NO ACTION" 5166 elif self._match_text_seq("CASCADE"): 5167 action = "CASCADE" 5168 elif self._match_text_seq("RESTRICT"): 5169 action = "RESTRICT" 5170 elif self._match_pair(TokenType.SET, TokenType.NULL): 5171 action = "SET NULL" 5172 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5173 action = "SET DEFAULT" 5174 else: 5175 self.raise_error("Invalid key constraint") 5176 5177 options.append(f"ON {on} {action}") 5178 elif self._match_text_seq("NOT", "ENFORCED"): 5179 options.append("NOT ENFORCED") 5180 elif self._match_text_seq("DEFERRABLE"): 5181 options.append("DEFERRABLE") 5182 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5183 options.append("INITIALLY DEFERRED") 5184 elif self._match_text_seq("NORELY"): 5185 options.append("NORELY") 5186 elif self._match_text_seq("MATCH", "FULL"): 5187 options.append("MATCH FULL") 5188 else: 5189 break 5190 5191 return options 5192 5193 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5194 if match and not self._match(TokenType.REFERENCES): 5195 return None 5196 5197 expressions = None 5198 this = self._parse_table(schema=True) 5199 options = self._parse_key_constraint_options() 5200 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5201 5202 def _parse_foreign_key(self) -> exp.ForeignKey: 5203 expressions = self._parse_wrapped_id_vars() 5204 reference = self._parse_references() 5205 options = {} 5206 5207 while self._match(TokenType.ON): 5208 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5209 self.raise_error("Expected DELETE or UPDATE") 5210 5211 kind = self._prev.text.lower() 5212 5213 if self._match_text_seq("NO", "ACTION"): 5214 action = "NO ACTION" 5215 elif self._match(TokenType.SET): 5216 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5217 action = "SET " + self._prev.text.upper() 5218 else: 5219 self._advance() 5220 action = self._prev.text.upper() 5221 5222 options[kind] = action 5223 5224 return self.expression( 5225 exp.ForeignKey, 5226 expressions=expressions, 5227 reference=reference, 5228 **options, # type: ignore 5229 ) 5230 5231 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5232 return self._parse_field() 5233 5234 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5235 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5236 self._retreat(self._index - 1) 5237 return None 5238 5239 id_vars = self._parse_wrapped_id_vars() 5240 return self.expression( 5241 exp.PeriodForSystemTimeConstraint, 5242 this=seq_get(id_vars, 0), 5243 expression=seq_get(id_vars, 1), 5244 ) 5245 5246 def _parse_primary_key( 5247 self, wrapped_optional: bool = False, in_props: bool = False 5248 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5249 desc = ( 5250 self._match_set((TokenType.ASC, TokenType.DESC)) 5251 and self._prev.token_type == TokenType.DESC 5252 ) 5253 5254 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5255 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5256 5257 expressions = self._parse_wrapped_csv( 5258 self._parse_primary_key_part, optional=wrapped_optional 5259 ) 5260 options = self._parse_key_constraint_options() 5261 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5262 5263 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5264 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5265 5266 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5267 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5268 return this 5269 5270 bracket_kind = self._prev.token_type 5271 expressions = self._parse_csv( 5272 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5273 ) 5274 5275 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5276 self.raise_error("Expected ]") 5277 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5278 self.raise_error("Expected }") 5279 5280 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5281 if bracket_kind == TokenType.L_BRACE: 5282 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5283 elif not this: 5284 this = self.expression(exp.Array, expressions=expressions) 5285 else: 5286 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5287 if constructor_type: 5288 return self.expression(constructor_type, expressions=expressions) 5289 5290 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5291 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5292 5293 self._add_comments(this) 5294 return self._parse_bracket(this) 5295 5296 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5297 if self._match(TokenType.COLON): 5298 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5299 return this 5300 5301 def _parse_case(self) -> t.Optional[exp.Expression]: 5302 ifs = [] 5303 default = None 5304 5305 comments = self._prev_comments 5306 expression = self._parse_assignment() 5307 5308 while self._match(TokenType.WHEN): 5309 this = self._parse_assignment() 5310 self._match(TokenType.THEN) 5311 then = self._parse_assignment() 5312 ifs.append(self.expression(exp.If, this=this, true=then)) 5313 5314 if self._match(TokenType.ELSE): 5315 default = self._parse_assignment() 5316 5317 if not self._match(TokenType.END): 5318 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5319 default = exp.column("interval") 5320 else: 5321 self.raise_error("Expected END after CASE", self._prev) 5322 5323 return self.expression( 5324 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5325 ) 5326 5327 def _parse_if(self) -> t.Optional[exp.Expression]: 5328 if self._match(TokenType.L_PAREN): 5329 args = self._parse_csv(self._parse_assignment) 5330 this = self.validate_expression(exp.If.from_arg_list(args), args) 5331 self._match_r_paren() 5332 else: 5333 index = self._index - 1 5334 5335 if self.NO_PAREN_IF_COMMANDS and index == 0: 5336 return self._parse_as_command(self._prev) 5337 5338 condition = self._parse_assignment() 5339 5340 if not condition: 5341 self._retreat(index) 5342 return None 5343 5344 self._match(TokenType.THEN) 5345 true = self._parse_assignment() 5346 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5347 self._match(TokenType.END) 5348 this = self.expression(exp.If, this=condition, true=true, false=false) 5349 5350 return this 5351 5352 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5353 if not self._match_text_seq("VALUE", "FOR"): 5354 self._retreat(self._index - 1) 5355 return None 5356 5357 return self.expression( 5358 exp.NextValueFor, 5359 this=self._parse_column(), 5360 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5361 ) 5362 5363 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5364 this = self._parse_function() or self._parse_var_or_string(upper=True) 5365 5366 if self._match(TokenType.FROM): 5367 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5368 5369 if not self._match(TokenType.COMMA): 5370 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5371 5372 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5373 5374 def _parse_gap_fill(self) -> exp.GapFill: 5375 self._match(TokenType.TABLE) 5376 this = self._parse_table() 5377 5378 self._match(TokenType.COMMA) 5379 args = [this, *self._parse_csv(self._parse_lambda)] 5380 5381 gap_fill = exp.GapFill.from_arg_list(args) 5382 return self.validate_expression(gap_fill, args) 5383 5384 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5385 this = self._parse_assignment() 5386 5387 if not self._match(TokenType.ALIAS): 5388 if self._match(TokenType.COMMA): 5389 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5390 5391 self.raise_error("Expected AS after CAST") 5392 5393 fmt = None 5394 to = self._parse_types() 5395 5396 if self._match(TokenType.FORMAT): 5397 fmt_string = self._parse_string() 5398 fmt = self._parse_at_time_zone(fmt_string) 5399 5400 if not to: 5401 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5402 if to.this in exp.DataType.TEMPORAL_TYPES: 5403 this = self.expression( 5404 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5405 this=this, 5406 format=exp.Literal.string( 5407 format_time( 5408 fmt_string.this if fmt_string else "", 5409 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5410 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5411 ) 5412 ), 5413 safe=safe, 5414 ) 5415 5416 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5417 this.set("zone", fmt.args["zone"]) 5418 return this 5419 elif not to: 5420 self.raise_error("Expected TYPE after CAST") 5421 elif isinstance(to, exp.Identifier): 5422 to = exp.DataType.build(to.name, udt=True) 5423 elif to.this == exp.DataType.Type.CHAR: 5424 if self._match(TokenType.CHARACTER_SET): 5425 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5426 5427 return self.expression( 5428 exp.Cast if strict else exp.TryCast, 5429 this=this, 5430 to=to, 5431 format=fmt, 5432 safe=safe, 5433 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5434 ) 5435 5436 def _parse_string_agg(self) -> exp.Expression: 5437 if self._match(TokenType.DISTINCT): 5438 args: t.List[t.Optional[exp.Expression]] = [ 5439 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5440 ] 5441 if self._match(TokenType.COMMA): 5442 args.extend(self._parse_csv(self._parse_assignment)) 5443 else: 5444 args = self._parse_csv(self._parse_assignment) # type: ignore 5445 5446 index = self._index 5447 if not self._match(TokenType.R_PAREN) and args: 5448 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5449 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5450 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5451 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5452 5453 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5454 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5455 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5456 if not self._match_text_seq("WITHIN", "GROUP"): 5457 self._retreat(index) 5458 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5459 5460 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5461 order = self._parse_order(this=seq_get(args, 0)) 5462 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5463 5464 def _parse_convert( 5465 self, strict: bool, safe: t.Optional[bool] = None 5466 ) -> t.Optional[exp.Expression]: 5467 this = self._parse_bitwise() 5468 5469 if self._match(TokenType.USING): 5470 to: t.Optional[exp.Expression] = self.expression( 5471 exp.CharacterSet, this=self._parse_var() 5472 ) 5473 elif self._match(TokenType.COMMA): 5474 to = self._parse_types() 5475 else: 5476 to = None 5477 5478 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5479 5480 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5481 """ 5482 There are generally two variants of the DECODE function: 5483 5484 - DECODE(bin, charset) 5485 - DECODE(expression, search, result [, search, result] ... [, default]) 5486 5487 The second variant will always be parsed into a CASE expression. Note that NULL 5488 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5489 instead of relying on pattern matching. 5490 """ 5491 args = self._parse_csv(self._parse_assignment) 5492 5493 if len(args) < 3: 5494 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5495 5496 expression, *expressions = args 5497 if not expression: 5498 return None 5499 5500 ifs = [] 5501 for search, result in zip(expressions[::2], expressions[1::2]): 5502 if not search or not result: 5503 return None 5504 5505 if isinstance(search, exp.Literal): 5506 ifs.append( 5507 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5508 ) 5509 elif isinstance(search, exp.Null): 5510 ifs.append( 5511 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5512 ) 5513 else: 5514 cond = exp.or_( 5515 exp.EQ(this=expression.copy(), expression=search), 5516 exp.and_( 5517 exp.Is(this=expression.copy(), expression=exp.Null()), 5518 exp.Is(this=search.copy(), expression=exp.Null()), 5519 copy=False, 5520 ), 5521 copy=False, 5522 ) 5523 ifs.append(exp.If(this=cond, true=result)) 5524 5525 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5526 5527 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5528 self._match_text_seq("KEY") 5529 key = self._parse_column() 5530 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5531 self._match_text_seq("VALUE") 5532 value = self._parse_bitwise() 5533 5534 if not key and not value: 5535 return None 5536 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5537 5538 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5539 if not this or not self._match_text_seq("FORMAT", "JSON"): 5540 return this 5541 5542 return self.expression(exp.FormatJson, this=this) 5543 5544 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5545 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5546 for value in values: 5547 if self._match_text_seq(value, "ON", on): 5548 return f"{value} ON {on}" 5549 5550 return None 5551 5552 @t.overload 5553 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5554 5555 @t.overload 5556 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5557 5558 def _parse_json_object(self, agg=False): 5559 star = self._parse_star() 5560 expressions = ( 5561 [star] 5562 if star 5563 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5564 ) 5565 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5566 5567 unique_keys = None 5568 if self._match_text_seq("WITH", "UNIQUE"): 5569 unique_keys = True 5570 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5571 unique_keys = False 5572 5573 self._match_text_seq("KEYS") 5574 5575 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5576 self._parse_type() 5577 ) 5578 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5579 5580 return self.expression( 5581 exp.JSONObjectAgg if agg else exp.JSONObject, 5582 expressions=expressions, 5583 null_handling=null_handling, 5584 unique_keys=unique_keys, 5585 return_type=return_type, 5586 encoding=encoding, 5587 ) 5588 5589 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5590 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5591 if not self._match_text_seq("NESTED"): 5592 this = self._parse_id_var() 5593 kind = self._parse_types(allow_identifiers=False) 5594 nested = None 5595 else: 5596 this = None 5597 kind = None 5598 nested = True 5599 5600 path = self._match_text_seq("PATH") and self._parse_string() 5601 nested_schema = nested and self._parse_json_schema() 5602 5603 return self.expression( 5604 exp.JSONColumnDef, 5605 this=this, 5606 kind=kind, 5607 path=path, 5608 nested_schema=nested_schema, 5609 ) 5610 5611 def _parse_json_schema(self) -> exp.JSONSchema: 5612 self._match_text_seq("COLUMNS") 5613 return self.expression( 5614 exp.JSONSchema, 5615 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5616 ) 5617 5618 def _parse_json_table(self) -> exp.JSONTable: 5619 this = self._parse_format_json(self._parse_bitwise()) 5620 path = self._match(TokenType.COMMA) and self._parse_string() 5621 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5622 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5623 schema = self._parse_json_schema() 5624 5625 return exp.JSONTable( 5626 this=this, 5627 schema=schema, 5628 path=path, 5629 error_handling=error_handling, 5630 empty_handling=empty_handling, 5631 ) 5632 5633 def _parse_match_against(self) -> exp.MatchAgainst: 5634 expressions = self._parse_csv(self._parse_column) 5635 5636 self._match_text_seq(")", "AGAINST", "(") 5637 5638 this = self._parse_string() 5639 5640 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5641 modifier = "IN NATURAL LANGUAGE MODE" 5642 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5643 modifier = f"{modifier} WITH QUERY EXPANSION" 5644 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5645 modifier = "IN BOOLEAN MODE" 5646 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5647 modifier = "WITH QUERY EXPANSION" 5648 else: 5649 modifier = None 5650 5651 return self.expression( 5652 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5653 ) 5654 5655 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5656 def _parse_open_json(self) -> exp.OpenJSON: 5657 this = self._parse_bitwise() 5658 path = self._match(TokenType.COMMA) and self._parse_string() 5659 5660 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5661 this = self._parse_field(any_token=True) 5662 kind = self._parse_types() 5663 path = self._parse_string() 5664 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5665 5666 return self.expression( 5667 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5668 ) 5669 5670 expressions = None 5671 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5672 self._match_l_paren() 5673 expressions = self._parse_csv(_parse_open_json_column_def) 5674 5675 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5676 5677 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5678 args = self._parse_csv(self._parse_bitwise) 5679 5680 if self._match(TokenType.IN): 5681 return self.expression( 5682 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5683 ) 5684 5685 if haystack_first: 5686 haystack = seq_get(args, 0) 5687 needle = seq_get(args, 1) 5688 else: 5689 needle = seq_get(args, 0) 5690 haystack = seq_get(args, 1) 5691 5692 return self.expression( 5693 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5694 ) 5695 5696 def _parse_predict(self) -> exp.Predict: 5697 self._match_text_seq("MODEL") 5698 this = self._parse_table() 5699 5700 self._match(TokenType.COMMA) 5701 self._match_text_seq("TABLE") 5702 5703 return self.expression( 5704 exp.Predict, 5705 this=this, 5706 expression=self._parse_table(), 5707 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5708 ) 5709 5710 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5711 args = self._parse_csv(self._parse_table) 5712 return exp.JoinHint(this=func_name.upper(), expressions=args) 5713 5714 def _parse_substring(self) -> exp.Substring: 5715 # Postgres supports the form: substring(string [from int] [for int]) 5716 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5717 5718 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5719 5720 if self._match(TokenType.FROM): 5721 args.append(self._parse_bitwise()) 5722 if self._match(TokenType.FOR): 5723 if len(args) == 1: 5724 args.append(exp.Literal.number(1)) 5725 args.append(self._parse_bitwise()) 5726 5727 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5728 5729 def _parse_trim(self) -> exp.Trim: 5730 # https://www.w3resource.com/sql/character-functions/trim.php 5731 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5732 5733 position = None 5734 collation = None 5735 expression = None 5736 5737 if self._match_texts(self.TRIM_TYPES): 5738 position = self._prev.text.upper() 5739 5740 this = self._parse_bitwise() 5741 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5742 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5743 expression = self._parse_bitwise() 5744 5745 if invert_order: 5746 this, expression = expression, this 5747 5748 if self._match(TokenType.COLLATE): 5749 collation = self._parse_bitwise() 5750 5751 return self.expression( 5752 exp.Trim, this=this, position=position, expression=expression, collation=collation 5753 ) 5754 5755 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5756 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5757 5758 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5759 return self._parse_window(self._parse_id_var(), alias=True) 5760 5761 def _parse_respect_or_ignore_nulls( 5762 self, this: t.Optional[exp.Expression] 5763 ) -> t.Optional[exp.Expression]: 5764 if self._match_text_seq("IGNORE", "NULLS"): 5765 return self.expression(exp.IgnoreNulls, this=this) 5766 if self._match_text_seq("RESPECT", "NULLS"): 5767 return self.expression(exp.RespectNulls, this=this) 5768 return this 5769 5770 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5771 if self._match(TokenType.HAVING): 5772 self._match_texts(("MAX", "MIN")) 5773 max = self._prev.text.upper() != "MIN" 5774 return self.expression( 5775 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5776 ) 5777 5778 return this 5779 5780 def _parse_window( 5781 self, this: t.Optional[exp.Expression], alias: bool = False 5782 ) -> t.Optional[exp.Expression]: 5783 func = this 5784 comments = func.comments if isinstance(func, exp.Expression) else None 5785 5786 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5787 self._match(TokenType.WHERE) 5788 this = self.expression( 5789 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5790 ) 5791 self._match_r_paren() 5792 5793 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5794 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5795 if self._match_text_seq("WITHIN", "GROUP"): 5796 order = self._parse_wrapped(self._parse_order) 5797 this = self.expression(exp.WithinGroup, this=this, expression=order) 5798 5799 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5800 # Some dialects choose to implement and some do not. 5801 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5802 5803 # There is some code above in _parse_lambda that handles 5804 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5805 5806 # The below changes handle 5807 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5808 5809 # Oracle allows both formats 5810 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5811 # and Snowflake chose to do the same for familiarity 5812 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5813 if isinstance(this, exp.AggFunc): 5814 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5815 5816 if ignore_respect and ignore_respect is not this: 5817 ignore_respect.replace(ignore_respect.this) 5818 this = self.expression(ignore_respect.__class__, this=this) 5819 5820 this = self._parse_respect_or_ignore_nulls(this) 5821 5822 # bigquery select from window x AS (partition by ...) 5823 if alias: 5824 over = None 5825 self._match(TokenType.ALIAS) 5826 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5827 return this 5828 else: 5829 over = self._prev.text.upper() 5830 5831 if comments and isinstance(func, exp.Expression): 5832 func.pop_comments() 5833 5834 if not self._match(TokenType.L_PAREN): 5835 return self.expression( 5836 exp.Window, 5837 comments=comments, 5838 this=this, 5839 alias=self._parse_id_var(False), 5840 over=over, 5841 ) 5842 5843 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5844 5845 first = self._match(TokenType.FIRST) 5846 if self._match_text_seq("LAST"): 5847 first = False 5848 5849 partition, order = self._parse_partition_and_order() 5850 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5851 5852 if kind: 5853 self._match(TokenType.BETWEEN) 5854 start = self._parse_window_spec() 5855 self._match(TokenType.AND) 5856 end = self._parse_window_spec() 5857 5858 spec = self.expression( 5859 exp.WindowSpec, 5860 kind=kind, 5861 start=start["value"], 5862 start_side=start["side"], 5863 end=end["value"], 5864 end_side=end["side"], 5865 ) 5866 else: 5867 spec = None 5868 5869 self._match_r_paren() 5870 5871 window = self.expression( 5872 exp.Window, 5873 comments=comments, 5874 this=this, 5875 partition_by=partition, 5876 order=order, 5877 spec=spec, 5878 alias=window_alias, 5879 over=over, 5880 first=first, 5881 ) 5882 5883 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5884 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5885 return self._parse_window(window, alias=alias) 5886 5887 return window 5888 5889 def _parse_partition_and_order( 5890 self, 5891 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5892 return self._parse_partition_by(), self._parse_order() 5893 5894 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5895 self._match(TokenType.BETWEEN) 5896 5897 return { 5898 "value": ( 5899 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5900 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5901 or self._parse_bitwise() 5902 ), 5903 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5904 } 5905 5906 def _parse_alias( 5907 self, this: t.Optional[exp.Expression], explicit: bool = False 5908 ) -> t.Optional[exp.Expression]: 5909 any_token = self._match(TokenType.ALIAS) 5910 comments = self._prev_comments or [] 5911 5912 if explicit and not any_token: 5913 return this 5914 5915 if self._match(TokenType.L_PAREN): 5916 aliases = self.expression( 5917 exp.Aliases, 5918 comments=comments, 5919 this=this, 5920 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5921 ) 5922 self._match_r_paren(aliases) 5923 return aliases 5924 5925 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5926 self.STRING_ALIASES and self._parse_string_as_identifier() 5927 ) 5928 5929 if alias: 5930 comments.extend(alias.pop_comments()) 5931 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5932 column = this.this 5933 5934 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5935 if not this.comments and column and column.comments: 5936 this.comments = column.pop_comments() 5937 5938 return this 5939 5940 def _parse_id_var( 5941 self, 5942 any_token: bool = True, 5943 tokens: t.Optional[t.Collection[TokenType]] = None, 5944 ) -> t.Optional[exp.Expression]: 5945 expression = self._parse_identifier() 5946 if not expression and ( 5947 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5948 ): 5949 quoted = self._prev.token_type == TokenType.STRING 5950 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5951 5952 return expression 5953 5954 def _parse_string(self) -> t.Optional[exp.Expression]: 5955 if self._match_set(self.STRING_PARSERS): 5956 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5957 return self._parse_placeholder() 5958 5959 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5960 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5961 5962 def _parse_number(self) -> t.Optional[exp.Expression]: 5963 if self._match_set(self.NUMERIC_PARSERS): 5964 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5965 return self._parse_placeholder() 5966 5967 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5968 if self._match(TokenType.IDENTIFIER): 5969 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5970 return self._parse_placeholder() 5971 5972 def _parse_var( 5973 self, 5974 any_token: bool = False, 5975 tokens: t.Optional[t.Collection[TokenType]] = None, 5976 upper: bool = False, 5977 ) -> t.Optional[exp.Expression]: 5978 if ( 5979 (any_token and self._advance_any()) 5980 or self._match(TokenType.VAR) 5981 or (self._match_set(tokens) if tokens else False) 5982 ): 5983 return self.expression( 5984 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5985 ) 5986 return self._parse_placeholder() 5987 5988 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5989 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5990 self._advance() 5991 return self._prev 5992 return None 5993 5994 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 5995 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 5996 5997 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5998 return self._parse_primary() or self._parse_var(any_token=True) 5999 6000 def _parse_null(self) -> t.Optional[exp.Expression]: 6001 if self._match_set(self.NULL_TOKENS): 6002 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6003 return self._parse_placeholder() 6004 6005 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6006 if self._match(TokenType.TRUE): 6007 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6008 if self._match(TokenType.FALSE): 6009 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6010 return self._parse_placeholder() 6011 6012 def _parse_star(self) -> t.Optional[exp.Expression]: 6013 if self._match(TokenType.STAR): 6014 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6015 return self._parse_placeholder() 6016 6017 def _parse_parameter(self) -> exp.Parameter: 6018 this = self._parse_identifier() or self._parse_primary_or_var() 6019 return self.expression(exp.Parameter, this=this) 6020 6021 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6022 if self._match_set(self.PLACEHOLDER_PARSERS): 6023 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6024 if placeholder: 6025 return placeholder 6026 self._advance(-1) 6027 return None 6028 6029 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6030 if not self._match_texts(keywords): 6031 return None 6032 if self._match(TokenType.L_PAREN, advance=False): 6033 return self._parse_wrapped_csv(self._parse_expression) 6034 6035 expression = self._parse_expression() 6036 return [expression] if expression else None 6037 6038 def _parse_csv( 6039 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6040 ) -> t.List[exp.Expression]: 6041 parse_result = parse_method() 6042 items = [parse_result] if parse_result is not None else [] 6043 6044 while self._match(sep): 6045 self._add_comments(parse_result) 6046 parse_result = parse_method() 6047 if parse_result is not None: 6048 items.append(parse_result) 6049 6050 return items 6051 6052 def _parse_tokens( 6053 self, parse_method: t.Callable, expressions: t.Dict 6054 ) -> t.Optional[exp.Expression]: 6055 this = parse_method() 6056 6057 while self._match_set(expressions): 6058 this = self.expression( 6059 expressions[self._prev.token_type], 6060 this=this, 6061 comments=self._prev_comments, 6062 expression=parse_method(), 6063 ) 6064 6065 return this 6066 6067 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6068 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6069 6070 def _parse_wrapped_csv( 6071 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6072 ) -> t.List[exp.Expression]: 6073 return self._parse_wrapped( 6074 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6075 ) 6076 6077 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6078 wrapped = self._match(TokenType.L_PAREN) 6079 if not wrapped and not optional: 6080 self.raise_error("Expecting (") 6081 parse_result = parse_method() 6082 if wrapped: 6083 self._match_r_paren() 6084 return parse_result 6085 6086 def _parse_expressions(self) -> t.List[exp.Expression]: 6087 return self._parse_csv(self._parse_expression) 6088 6089 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6090 return self._parse_select() or self._parse_set_operations( 6091 self._parse_expression() if alias else self._parse_assignment() 6092 ) 6093 6094 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6095 return self._parse_query_modifiers( 6096 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6097 ) 6098 6099 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6100 this = None 6101 if self._match_texts(self.TRANSACTION_KIND): 6102 this = self._prev.text 6103 6104 self._match_texts(("TRANSACTION", "WORK")) 6105 6106 modes = [] 6107 while True: 6108 mode = [] 6109 while self._match(TokenType.VAR): 6110 mode.append(self._prev.text) 6111 6112 if mode: 6113 modes.append(" ".join(mode)) 6114 if not self._match(TokenType.COMMA): 6115 break 6116 6117 return self.expression(exp.Transaction, this=this, modes=modes) 6118 6119 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6120 chain = None 6121 savepoint = None 6122 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6123 6124 self._match_texts(("TRANSACTION", "WORK")) 6125 6126 if self._match_text_seq("TO"): 6127 self._match_text_seq("SAVEPOINT") 6128 savepoint = self._parse_id_var() 6129 6130 if self._match(TokenType.AND): 6131 chain = not self._match_text_seq("NO") 6132 self._match_text_seq("CHAIN") 6133 6134 if is_rollback: 6135 return self.expression(exp.Rollback, savepoint=savepoint) 6136 6137 return self.expression(exp.Commit, chain=chain) 6138 6139 def _parse_refresh(self) -> exp.Refresh: 6140 self._match(TokenType.TABLE) 6141 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6142 6143 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6144 if not self._match_text_seq("ADD"): 6145 return None 6146 6147 self._match(TokenType.COLUMN) 6148 exists_column = self._parse_exists(not_=True) 6149 expression = self._parse_field_def() 6150 6151 if expression: 6152 expression.set("exists", exists_column) 6153 6154 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6155 if self._match_texts(("FIRST", "AFTER")): 6156 position = self._prev.text 6157 column_position = self.expression( 6158 exp.ColumnPosition, this=self._parse_column(), position=position 6159 ) 6160 expression.set("position", column_position) 6161 6162 return expression 6163 6164 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6165 drop = self._match(TokenType.DROP) and self._parse_drop() 6166 if drop and not isinstance(drop, exp.Command): 6167 drop.set("kind", drop.args.get("kind", "COLUMN")) 6168 return drop 6169 6170 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6171 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6172 return self.expression( 6173 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6174 ) 6175 6176 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6177 index = self._index - 1 6178 6179 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6180 return self._parse_csv( 6181 lambda: self.expression( 6182 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6183 ) 6184 ) 6185 6186 self._retreat(index) 6187 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6188 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6189 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6190 6191 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6192 if self._match_texts(self.ALTER_ALTER_PARSERS): 6193 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6194 6195 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6196 # keyword after ALTER we default to parsing this statement 6197 self._match(TokenType.COLUMN) 6198 column = self._parse_field(any_token=True) 6199 6200 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6201 return self.expression(exp.AlterColumn, this=column, drop=True) 6202 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6203 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6204 if self._match(TokenType.COMMENT): 6205 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6206 if self._match_text_seq("DROP", "NOT", "NULL"): 6207 return self.expression( 6208 exp.AlterColumn, 6209 this=column, 6210 drop=True, 6211 allow_null=True, 6212 ) 6213 if self._match_text_seq("SET", "NOT", "NULL"): 6214 return self.expression( 6215 exp.AlterColumn, 6216 this=column, 6217 allow_null=False, 6218 ) 6219 self._match_text_seq("SET", "DATA") 6220 self._match_text_seq("TYPE") 6221 return self.expression( 6222 exp.AlterColumn, 6223 this=column, 6224 dtype=self._parse_types(), 6225 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6226 using=self._match(TokenType.USING) and self._parse_assignment(), 6227 ) 6228 6229 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6230 if self._match_texts(("ALL", "EVEN", "AUTO")): 6231 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6232 6233 self._match_text_seq("KEY", "DISTKEY") 6234 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6235 6236 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6237 if compound: 6238 self._match_text_seq("SORTKEY") 6239 6240 if self._match(TokenType.L_PAREN, advance=False): 6241 return self.expression( 6242 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6243 ) 6244 6245 self._match_texts(("AUTO", "NONE")) 6246 return self.expression( 6247 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6248 ) 6249 6250 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6251 index = self._index - 1 6252 6253 partition_exists = self._parse_exists() 6254 if self._match(TokenType.PARTITION, advance=False): 6255 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6256 6257 self._retreat(index) 6258 return self._parse_csv(self._parse_drop_column) 6259 6260 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6261 if self._match(TokenType.COLUMN): 6262 exists = self._parse_exists() 6263 old_column = self._parse_column() 6264 to = self._match_text_seq("TO") 6265 new_column = self._parse_column() 6266 6267 if old_column is None or to is None or new_column is None: 6268 return None 6269 6270 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6271 6272 self._match_text_seq("TO") 6273 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6274 6275 def _parse_alter_table_set(self) -> exp.AlterSet: 6276 alter_set = self.expression(exp.AlterSet) 6277 6278 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6279 "TABLE", "PROPERTIES" 6280 ): 6281 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6282 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6283 alter_set.set("expressions", [self._parse_assignment()]) 6284 elif self._match_texts(("LOGGED", "UNLOGGED")): 6285 alter_set.set("option", exp.var(self._prev.text.upper())) 6286 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6287 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6288 elif self._match_text_seq("LOCATION"): 6289 alter_set.set("location", self._parse_field()) 6290 elif self._match_text_seq("ACCESS", "METHOD"): 6291 alter_set.set("access_method", self._parse_field()) 6292 elif self._match_text_seq("TABLESPACE"): 6293 alter_set.set("tablespace", self._parse_field()) 6294 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6295 alter_set.set("file_format", [self._parse_field()]) 6296 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6297 alter_set.set("file_format", self._parse_wrapped_options()) 6298 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6299 alter_set.set("copy_options", self._parse_wrapped_options()) 6300 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6301 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6302 else: 6303 if self._match_text_seq("SERDE"): 6304 alter_set.set("serde", self._parse_field()) 6305 6306 alter_set.set("expressions", [self._parse_properties()]) 6307 6308 return alter_set 6309 6310 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6311 start = self._prev 6312 6313 if not self._match(TokenType.TABLE): 6314 return self._parse_as_command(start) 6315 6316 exists = self._parse_exists() 6317 only = self._match_text_seq("ONLY") 6318 this = self._parse_table(schema=True) 6319 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6320 6321 if self._next: 6322 self._advance() 6323 6324 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6325 if parser: 6326 actions = ensure_list(parser(self)) 6327 options = self._parse_csv(self._parse_property) 6328 6329 if not self._curr and actions: 6330 return self.expression( 6331 exp.AlterTable, 6332 this=this, 6333 exists=exists, 6334 actions=actions, 6335 only=only, 6336 options=options, 6337 cluster=cluster, 6338 ) 6339 6340 return self._parse_as_command(start) 6341 6342 def _parse_merge(self) -> exp.Merge: 6343 self._match(TokenType.INTO) 6344 target = self._parse_table() 6345 6346 if target and self._match(TokenType.ALIAS, advance=False): 6347 target.set("alias", self._parse_table_alias()) 6348 6349 self._match(TokenType.USING) 6350 using = self._parse_table() 6351 6352 self._match(TokenType.ON) 6353 on = self._parse_assignment() 6354 6355 return self.expression( 6356 exp.Merge, 6357 this=target, 6358 using=using, 6359 on=on, 6360 expressions=self._parse_when_matched(), 6361 ) 6362 6363 def _parse_when_matched(self) -> t.List[exp.When]: 6364 whens = [] 6365 6366 while self._match(TokenType.WHEN): 6367 matched = not self._match(TokenType.NOT) 6368 self._match_text_seq("MATCHED") 6369 source = ( 6370 False 6371 if self._match_text_seq("BY", "TARGET") 6372 else self._match_text_seq("BY", "SOURCE") 6373 ) 6374 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6375 6376 self._match(TokenType.THEN) 6377 6378 if self._match(TokenType.INSERT): 6379 _this = self._parse_star() 6380 if _this: 6381 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6382 else: 6383 then = self.expression( 6384 exp.Insert, 6385 this=self._parse_value(), 6386 expression=self._match_text_seq("VALUES") and self._parse_value(), 6387 ) 6388 elif self._match(TokenType.UPDATE): 6389 expressions = self._parse_star() 6390 if expressions: 6391 then = self.expression(exp.Update, expressions=expressions) 6392 else: 6393 then = self.expression( 6394 exp.Update, 6395 expressions=self._match(TokenType.SET) 6396 and self._parse_csv(self._parse_equality), 6397 ) 6398 elif self._match(TokenType.DELETE): 6399 then = self.expression(exp.Var, this=self._prev.text) 6400 else: 6401 then = None 6402 6403 whens.append( 6404 self.expression( 6405 exp.When, 6406 matched=matched, 6407 source=source, 6408 condition=condition, 6409 then=then, 6410 ) 6411 ) 6412 return whens 6413 6414 def _parse_show(self) -> t.Optional[exp.Expression]: 6415 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6416 if parser: 6417 return parser(self) 6418 return self._parse_as_command(self._prev) 6419 6420 def _parse_set_item_assignment( 6421 self, kind: t.Optional[str] = None 6422 ) -> t.Optional[exp.Expression]: 6423 index = self._index 6424 6425 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6426 return self._parse_set_transaction(global_=kind == "GLOBAL") 6427 6428 left = self._parse_primary() or self._parse_column() 6429 assignment_delimiter = self._match_texts(("=", "TO")) 6430 6431 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6432 self._retreat(index) 6433 return None 6434 6435 right = self._parse_statement() or self._parse_id_var() 6436 if isinstance(right, (exp.Column, exp.Identifier)): 6437 right = exp.var(right.name) 6438 6439 this = self.expression(exp.EQ, this=left, expression=right) 6440 return self.expression(exp.SetItem, this=this, kind=kind) 6441 6442 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6443 self._match_text_seq("TRANSACTION") 6444 characteristics = self._parse_csv( 6445 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6446 ) 6447 return self.expression( 6448 exp.SetItem, 6449 expressions=characteristics, 6450 kind="TRANSACTION", 6451 **{"global": global_}, # type: ignore 6452 ) 6453 6454 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6455 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6456 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6457 6458 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6459 index = self._index 6460 set_ = self.expression( 6461 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6462 ) 6463 6464 if self._curr: 6465 self._retreat(index) 6466 return self._parse_as_command(self._prev) 6467 6468 return set_ 6469 6470 def _parse_var_from_options( 6471 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6472 ) -> t.Optional[exp.Var]: 6473 start = self._curr 6474 if not start: 6475 return None 6476 6477 option = start.text.upper() 6478 continuations = options.get(option) 6479 6480 index = self._index 6481 self._advance() 6482 for keywords in continuations or []: 6483 if isinstance(keywords, str): 6484 keywords = (keywords,) 6485 6486 if self._match_text_seq(*keywords): 6487 option = f"{option} {' '.join(keywords)}" 6488 break 6489 else: 6490 if continuations or continuations is None: 6491 if raise_unmatched: 6492 self.raise_error(f"Unknown option {option}") 6493 6494 self._retreat(index) 6495 return None 6496 6497 return exp.var(option) 6498 6499 def _parse_as_command(self, start: Token) -> exp.Command: 6500 while self._curr: 6501 self._advance() 6502 text = self._find_sql(start, self._prev) 6503 size = len(start.text) 6504 self._warn_unsupported() 6505 return exp.Command(this=text[:size], expression=text[size:]) 6506 6507 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6508 settings = [] 6509 6510 self._match_l_paren() 6511 kind = self._parse_id_var() 6512 6513 if self._match(TokenType.L_PAREN): 6514 while True: 6515 key = self._parse_id_var() 6516 value = self._parse_primary() 6517 6518 if not key and value is None: 6519 break 6520 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6521 self._match(TokenType.R_PAREN) 6522 6523 self._match_r_paren() 6524 6525 return self.expression( 6526 exp.DictProperty, 6527 this=this, 6528 kind=kind.this if kind else None, 6529 settings=settings, 6530 ) 6531 6532 def _parse_dict_range(self, this: str) -> exp.DictRange: 6533 self._match_l_paren() 6534 has_min = self._match_text_seq("MIN") 6535 if has_min: 6536 min = self._parse_var() or self._parse_primary() 6537 self._match_text_seq("MAX") 6538 max = self._parse_var() or self._parse_primary() 6539 else: 6540 max = self._parse_var() or self._parse_primary() 6541 min = exp.Literal.number(0) 6542 self._match_r_paren() 6543 return self.expression(exp.DictRange, this=this, min=min, max=max) 6544 6545 def _parse_comprehension( 6546 self, this: t.Optional[exp.Expression] 6547 ) -> t.Optional[exp.Comprehension]: 6548 index = self._index 6549 expression = self._parse_column() 6550 if not self._match(TokenType.IN): 6551 self._retreat(index - 1) 6552 return None 6553 iterator = self._parse_column() 6554 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6555 return self.expression( 6556 exp.Comprehension, 6557 this=this, 6558 expression=expression, 6559 iterator=iterator, 6560 condition=condition, 6561 ) 6562 6563 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6564 if self._match(TokenType.HEREDOC_STRING): 6565 return self.expression(exp.Heredoc, this=self._prev.text) 6566 6567 if not self._match_text_seq("$"): 6568 return None 6569 6570 tags = ["$"] 6571 tag_text = None 6572 6573 if self._is_connected(): 6574 self._advance() 6575 tags.append(self._prev.text.upper()) 6576 else: 6577 self.raise_error("No closing $ found") 6578 6579 if tags[-1] != "$": 6580 if self._is_connected() and self._match_text_seq("$"): 6581 tag_text = tags[-1] 6582 tags.append("$") 6583 else: 6584 self.raise_error("No closing $ found") 6585 6586 heredoc_start = self._curr 6587 6588 while self._curr: 6589 if self._match_text_seq(*tags, advance=False): 6590 this = self._find_sql(heredoc_start, self._prev) 6591 self._advance(len(tags)) 6592 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6593 6594 self._advance() 6595 6596 self.raise_error(f"No closing {''.join(tags)} found") 6597 return None 6598 6599 def _find_parser( 6600 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6601 ) -> t.Optional[t.Callable]: 6602 if not self._curr: 6603 return None 6604 6605 index = self._index 6606 this = [] 6607 while True: 6608 # The current token might be multiple words 6609 curr = self._curr.text.upper() 6610 key = curr.split(" ") 6611 this.append(curr) 6612 6613 self._advance() 6614 result, trie = in_trie(trie, key) 6615 if result == TrieResult.FAILED: 6616 break 6617 6618 if result == TrieResult.EXISTS: 6619 subparser = parsers[" ".join(this)] 6620 return subparser 6621 6622 self._retreat(index) 6623 return None 6624 6625 def _match(self, token_type, advance=True, expression=None): 6626 if not self._curr: 6627 return None 6628 6629 if self._curr.token_type == token_type: 6630 if advance: 6631 self._advance() 6632 self._add_comments(expression) 6633 return True 6634 6635 return None 6636 6637 def _match_set(self, types, advance=True): 6638 if not self._curr: 6639 return None 6640 6641 if self._curr.token_type in types: 6642 if advance: 6643 self._advance() 6644 return True 6645 6646 return None 6647 6648 def _match_pair(self, token_type_a, token_type_b, advance=True): 6649 if not self._curr or not self._next: 6650 return None 6651 6652 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6653 if advance: 6654 self._advance(2) 6655 return True 6656 6657 return None 6658 6659 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6660 if not self._match(TokenType.L_PAREN, expression=expression): 6661 self.raise_error("Expecting (") 6662 6663 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6664 if not self._match(TokenType.R_PAREN, expression=expression): 6665 self.raise_error("Expecting )") 6666 6667 def _match_texts(self, texts, advance=True): 6668 if self._curr and self._curr.text.upper() in texts: 6669 if advance: 6670 self._advance() 6671 return True 6672 return None 6673 6674 def _match_text_seq(self, *texts, advance=True): 6675 index = self._index 6676 for text in texts: 6677 if self._curr and self._curr.text.upper() == text: 6678 self._advance() 6679 else: 6680 self._retreat(index) 6681 return None 6682 6683 if not advance: 6684 self._retreat(index) 6685 6686 return True 6687 6688 def _replace_lambda( 6689 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6690 ) -> t.Optional[exp.Expression]: 6691 if not node: 6692 return node 6693 6694 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6695 6696 for column in node.find_all(exp.Column): 6697 typ = lambda_types.get(column.parts[0].name) 6698 if typ is not None: 6699 dot_or_id = column.to_dot() if column.table else column.this 6700 6701 if typ: 6702 dot_or_id = self.expression( 6703 exp.Cast, 6704 this=dot_or_id, 6705 to=typ, 6706 ) 6707 6708 parent = column.parent 6709 6710 while isinstance(parent, exp.Dot): 6711 if not isinstance(parent.parent, exp.Dot): 6712 parent.replace(dot_or_id) 6713 break 6714 parent = parent.parent 6715 else: 6716 if column is node: 6717 node = dot_or_id 6718 else: 6719 column.replace(dot_or_id) 6720 return node 6721 6722 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6723 start = self._prev 6724 6725 # Not to be confused with TRUNCATE(number, decimals) function call 6726 if self._match(TokenType.L_PAREN): 6727 self._retreat(self._index - 2) 6728 return self._parse_function() 6729 6730 # Clickhouse supports TRUNCATE DATABASE as well 6731 is_database = self._match(TokenType.DATABASE) 6732 6733 self._match(TokenType.TABLE) 6734 6735 exists = self._parse_exists(not_=False) 6736 6737 expressions = self._parse_csv( 6738 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6739 ) 6740 6741 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6742 6743 if self._match_text_seq("RESTART", "IDENTITY"): 6744 identity = "RESTART" 6745 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6746 identity = "CONTINUE" 6747 else: 6748 identity = None 6749 6750 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6751 option = self._prev.text 6752 else: 6753 option = None 6754 6755 partition = self._parse_partition() 6756 6757 # Fallback case 6758 if self._curr: 6759 return self._parse_as_command(start) 6760 6761 return self.expression( 6762 exp.TruncateTable, 6763 expressions=expressions, 6764 is_database=is_database, 6765 exists=exists, 6766 cluster=cluster, 6767 identity=identity, 6768 option=option, 6769 partition=partition, 6770 ) 6771 6772 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6773 this = self._parse_ordered(self._parse_opclass) 6774 6775 if not self._match(TokenType.WITH): 6776 return this 6777 6778 op = self._parse_var(any_token=True) 6779 6780 return self.expression(exp.WithOperator, this=this, op=op) 6781 6782 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6783 self._match(TokenType.EQ) 6784 self._match(TokenType.L_PAREN) 6785 6786 opts: t.List[t.Optional[exp.Expression]] = [] 6787 while self._curr and not self._match(TokenType.R_PAREN): 6788 if self._match_text_seq("FORMAT_NAME", "="): 6789 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6790 # so we parse it separately to use _parse_field() 6791 prop = self.expression( 6792 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6793 ) 6794 opts.append(prop) 6795 else: 6796 opts.append(self._parse_property()) 6797 6798 self._match(TokenType.COMMA) 6799 6800 return opts 6801 6802 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6803 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6804 6805 options = [] 6806 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6807 option = self._parse_var(any_token=True) 6808 prev = self._prev.text.upper() 6809 6810 # Different dialects might separate options and values by white space, "=" and "AS" 6811 self._match(TokenType.EQ) 6812 self._match(TokenType.ALIAS) 6813 6814 param = self.expression(exp.CopyParameter, this=option) 6815 6816 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6817 TokenType.L_PAREN, advance=False 6818 ): 6819 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6820 param.set("expressions", self._parse_wrapped_options()) 6821 elif prev == "FILE_FORMAT": 6822 # T-SQL's external file format case 6823 param.set("expression", self._parse_field()) 6824 else: 6825 param.set("expression", self._parse_unquoted_field()) 6826 6827 options.append(param) 6828 self._match(sep) 6829 6830 return options 6831 6832 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6833 expr = self.expression(exp.Credentials) 6834 6835 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6836 expr.set("storage", self._parse_field()) 6837 if self._match_text_seq("CREDENTIALS"): 6838 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6839 creds = ( 6840 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6841 ) 6842 expr.set("credentials", creds) 6843 if self._match_text_seq("ENCRYPTION"): 6844 expr.set("encryption", self._parse_wrapped_options()) 6845 if self._match_text_seq("IAM_ROLE"): 6846 expr.set("iam_role", self._parse_field()) 6847 if self._match_text_seq("REGION"): 6848 expr.set("region", self._parse_field()) 6849 6850 return expr 6851 6852 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6853 return self._parse_field() 6854 6855 def _parse_copy(self) -> exp.Copy | exp.Command: 6856 start = self._prev 6857 6858 self._match(TokenType.INTO) 6859 6860 this = ( 6861 self._parse_select(nested=True, parse_subquery_alias=False) 6862 if self._match(TokenType.L_PAREN, advance=False) 6863 else self._parse_table(schema=True) 6864 ) 6865 6866 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6867 6868 files = self._parse_csv(self._parse_file_location) 6869 credentials = self._parse_credentials() 6870 6871 self._match_text_seq("WITH") 6872 6873 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6874 6875 # Fallback case 6876 if self._curr: 6877 return self._parse_as_command(start) 6878 6879 return self.expression( 6880 exp.Copy, 6881 this=this, 6882 kind=kind, 6883 credentials=credentials, 6884 files=files, 6885 params=params, 6886 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "HEX": build_hex, 155 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 156 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 157 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 158 "LIKE": build_like, 159 "LOG": build_logarithm, 160 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 161 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 162 "LOWER": build_lower, 163 "MOD": build_mod, 164 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 165 if len(args) != 2 166 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 167 "TIME_TO_TIME_STR": lambda args: exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 "TO_HEX": build_hex, 172 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 173 this=exp.Cast( 174 this=seq_get(args, 0), 175 to=exp.DataType(this=exp.DataType.Type.TEXT), 176 ), 177 start=exp.Literal.number(1), 178 length=exp.Literal.number(10), 179 ), 180 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 181 "UPPER": build_upper, 182 "VAR_MAP": build_var_map, 183 } 184 185 NO_PAREN_FUNCTIONS = { 186 TokenType.CURRENT_DATE: exp.CurrentDate, 187 TokenType.CURRENT_DATETIME: exp.CurrentDate, 188 TokenType.CURRENT_TIME: exp.CurrentTime, 189 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 190 TokenType.CURRENT_USER: exp.CurrentUser, 191 } 192 193 STRUCT_TYPE_TOKENS = { 194 TokenType.NESTED, 195 TokenType.OBJECT, 196 TokenType.STRUCT, 197 } 198 199 NESTED_TYPE_TOKENS = { 200 TokenType.ARRAY, 201 TokenType.LIST, 202 TokenType.LOWCARDINALITY, 203 TokenType.MAP, 204 TokenType.NULLABLE, 205 *STRUCT_TYPE_TOKENS, 206 } 207 208 ENUM_TYPE_TOKENS = { 209 TokenType.ENUM, 210 TokenType.ENUM8, 211 TokenType.ENUM16, 212 } 213 214 AGGREGATE_TYPE_TOKENS = { 215 TokenType.AGGREGATEFUNCTION, 216 TokenType.SIMPLEAGGREGATEFUNCTION, 217 } 218 219 TYPE_TOKENS = { 220 TokenType.BIT, 221 TokenType.BOOLEAN, 222 TokenType.TINYINT, 223 TokenType.UTINYINT, 224 TokenType.SMALLINT, 225 TokenType.USMALLINT, 226 TokenType.INT, 227 TokenType.UINT, 228 TokenType.BIGINT, 229 TokenType.UBIGINT, 230 TokenType.INT128, 231 TokenType.UINT128, 232 TokenType.INT256, 233 TokenType.UINT256, 234 TokenType.MEDIUMINT, 235 TokenType.UMEDIUMINT, 236 TokenType.FIXEDSTRING, 237 TokenType.FLOAT, 238 TokenType.DOUBLE, 239 TokenType.CHAR, 240 TokenType.NCHAR, 241 TokenType.VARCHAR, 242 TokenType.NVARCHAR, 243 TokenType.BPCHAR, 244 TokenType.TEXT, 245 TokenType.MEDIUMTEXT, 246 TokenType.LONGTEXT, 247 TokenType.MEDIUMBLOB, 248 TokenType.LONGBLOB, 249 TokenType.BINARY, 250 TokenType.VARBINARY, 251 TokenType.JSON, 252 TokenType.JSONB, 253 TokenType.INTERVAL, 254 TokenType.TINYBLOB, 255 TokenType.TINYTEXT, 256 TokenType.TIME, 257 TokenType.TIMETZ, 258 TokenType.TIMESTAMP, 259 TokenType.TIMESTAMP_S, 260 TokenType.TIMESTAMP_MS, 261 TokenType.TIMESTAMP_NS, 262 TokenType.TIMESTAMPTZ, 263 TokenType.TIMESTAMPLTZ, 264 TokenType.TIMESTAMPNTZ, 265 TokenType.DATETIME, 266 TokenType.DATETIME64, 267 TokenType.DATE, 268 TokenType.DATE32, 269 TokenType.INT4RANGE, 270 TokenType.INT4MULTIRANGE, 271 TokenType.INT8RANGE, 272 TokenType.INT8MULTIRANGE, 273 TokenType.NUMRANGE, 274 TokenType.NUMMULTIRANGE, 275 TokenType.TSRANGE, 276 TokenType.TSMULTIRANGE, 277 TokenType.TSTZRANGE, 278 TokenType.TSTZMULTIRANGE, 279 TokenType.DATERANGE, 280 TokenType.DATEMULTIRANGE, 281 TokenType.DECIMAL, 282 TokenType.UDECIMAL, 283 TokenType.BIGDECIMAL, 284 TokenType.UUID, 285 TokenType.GEOGRAPHY, 286 TokenType.GEOMETRY, 287 TokenType.HLLSKETCH, 288 TokenType.HSTORE, 289 TokenType.PSEUDO_TYPE, 290 TokenType.SUPER, 291 TokenType.SERIAL, 292 TokenType.SMALLSERIAL, 293 TokenType.BIGSERIAL, 294 TokenType.XML, 295 TokenType.YEAR, 296 TokenType.UNIQUEIDENTIFIER, 297 TokenType.USERDEFINED, 298 TokenType.MONEY, 299 TokenType.SMALLMONEY, 300 TokenType.ROWVERSION, 301 TokenType.IMAGE, 302 TokenType.VARIANT, 303 TokenType.VECTOR, 304 TokenType.OBJECT, 305 TokenType.OBJECT_IDENTIFIER, 306 TokenType.INET, 307 TokenType.IPADDRESS, 308 TokenType.IPPREFIX, 309 TokenType.IPV4, 310 TokenType.IPV6, 311 TokenType.UNKNOWN, 312 TokenType.NULL, 313 TokenType.NAME, 314 TokenType.TDIGEST, 315 *ENUM_TYPE_TOKENS, 316 *NESTED_TYPE_TOKENS, 317 *AGGREGATE_TYPE_TOKENS, 318 } 319 320 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 321 TokenType.BIGINT: TokenType.UBIGINT, 322 TokenType.INT: TokenType.UINT, 323 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 324 TokenType.SMALLINT: TokenType.USMALLINT, 325 TokenType.TINYINT: TokenType.UTINYINT, 326 TokenType.DECIMAL: TokenType.UDECIMAL, 327 } 328 329 SUBQUERY_PREDICATES = { 330 TokenType.ANY: exp.Any, 331 TokenType.ALL: exp.All, 332 TokenType.EXISTS: exp.Exists, 333 TokenType.SOME: exp.Any, 334 } 335 336 RESERVED_TOKENS = { 337 *Tokenizer.SINGLE_TOKENS.values(), 338 TokenType.SELECT, 339 } - {TokenType.IDENTIFIER} 340 341 DB_CREATABLES = { 342 TokenType.DATABASE, 343 TokenType.DICTIONARY, 344 TokenType.MODEL, 345 TokenType.SCHEMA, 346 TokenType.SEQUENCE, 347 TokenType.STORAGE_INTEGRATION, 348 TokenType.TABLE, 349 TokenType.TAG, 350 TokenType.VIEW, 351 TokenType.WAREHOUSE, 352 TokenType.STREAMLIT, 353 } 354 355 CREATABLES = { 356 TokenType.COLUMN, 357 TokenType.CONSTRAINT, 358 TokenType.FOREIGN_KEY, 359 TokenType.FUNCTION, 360 TokenType.INDEX, 361 TokenType.PROCEDURE, 362 *DB_CREATABLES, 363 } 364 365 # Tokens that can represent identifiers 366 ID_VAR_TOKENS = { 367 TokenType.VAR, 368 TokenType.ANTI, 369 TokenType.APPLY, 370 TokenType.ASC, 371 TokenType.ASOF, 372 TokenType.AUTO_INCREMENT, 373 TokenType.BEGIN, 374 TokenType.BPCHAR, 375 TokenType.CACHE, 376 TokenType.CASE, 377 TokenType.COLLATE, 378 TokenType.COMMAND, 379 TokenType.COMMENT, 380 TokenType.COMMIT, 381 TokenType.CONSTRAINT, 382 TokenType.COPY, 383 TokenType.DEFAULT, 384 TokenType.DELETE, 385 TokenType.DESC, 386 TokenType.DESCRIBE, 387 TokenType.DICTIONARY, 388 TokenType.DIV, 389 TokenType.END, 390 TokenType.EXECUTE, 391 TokenType.ESCAPE, 392 TokenType.FALSE, 393 TokenType.FIRST, 394 TokenType.FILTER, 395 TokenType.FINAL, 396 TokenType.FORMAT, 397 TokenType.FULL, 398 TokenType.IDENTIFIER, 399 TokenType.IS, 400 TokenType.ISNULL, 401 TokenType.INTERVAL, 402 TokenType.KEEP, 403 TokenType.KILL, 404 TokenType.LEFT, 405 TokenType.LOAD, 406 TokenType.MERGE, 407 TokenType.NATURAL, 408 TokenType.NEXT, 409 TokenType.OFFSET, 410 TokenType.OPERATOR, 411 TokenType.ORDINALITY, 412 TokenType.OVERLAPS, 413 TokenType.OVERWRITE, 414 TokenType.PARTITION, 415 TokenType.PERCENT, 416 TokenType.PIVOT, 417 TokenType.PRAGMA, 418 TokenType.RANGE, 419 TokenType.RECURSIVE, 420 TokenType.REFERENCES, 421 TokenType.REFRESH, 422 TokenType.REPLACE, 423 TokenType.RIGHT, 424 TokenType.ROLLUP, 425 TokenType.ROW, 426 TokenType.ROWS, 427 TokenType.SEMI, 428 TokenType.SET, 429 TokenType.SETTINGS, 430 TokenType.SHOW, 431 TokenType.TEMPORARY, 432 TokenType.TOP, 433 TokenType.TRUE, 434 TokenType.TRUNCATE, 435 TokenType.UNIQUE, 436 TokenType.UNNEST, 437 TokenType.UNPIVOT, 438 TokenType.UPDATE, 439 TokenType.USE, 440 TokenType.VOLATILE, 441 TokenType.WINDOW, 442 *CREATABLES, 443 *SUBQUERY_PREDICATES, 444 *TYPE_TOKENS, 445 *NO_PAREN_FUNCTIONS, 446 } 447 448 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 449 450 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 451 TokenType.ANTI, 452 TokenType.APPLY, 453 TokenType.ASOF, 454 TokenType.FULL, 455 TokenType.LEFT, 456 TokenType.LOCK, 457 TokenType.NATURAL, 458 TokenType.OFFSET, 459 TokenType.RIGHT, 460 TokenType.SEMI, 461 TokenType.WINDOW, 462 } 463 464 ALIAS_TOKENS = ID_VAR_TOKENS 465 466 ARRAY_CONSTRUCTORS = { 467 "ARRAY": exp.Array, 468 "LIST": exp.List, 469 } 470 471 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 472 473 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 474 475 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 476 477 FUNC_TOKENS = { 478 TokenType.COLLATE, 479 TokenType.COMMAND, 480 TokenType.CURRENT_DATE, 481 TokenType.CURRENT_DATETIME, 482 TokenType.CURRENT_TIMESTAMP, 483 TokenType.CURRENT_TIME, 484 TokenType.CURRENT_USER, 485 TokenType.FILTER, 486 TokenType.FIRST, 487 TokenType.FORMAT, 488 TokenType.GLOB, 489 TokenType.IDENTIFIER, 490 TokenType.INDEX, 491 TokenType.ISNULL, 492 TokenType.ILIKE, 493 TokenType.INSERT, 494 TokenType.LIKE, 495 TokenType.MERGE, 496 TokenType.OFFSET, 497 TokenType.PRIMARY_KEY, 498 TokenType.RANGE, 499 TokenType.REPLACE, 500 TokenType.RLIKE, 501 TokenType.ROW, 502 TokenType.UNNEST, 503 TokenType.VAR, 504 TokenType.LEFT, 505 TokenType.RIGHT, 506 TokenType.SEQUENCE, 507 TokenType.DATE, 508 TokenType.DATETIME, 509 TokenType.TABLE, 510 TokenType.TIMESTAMP, 511 TokenType.TIMESTAMPTZ, 512 TokenType.TRUNCATE, 513 TokenType.WINDOW, 514 TokenType.XOR, 515 *TYPE_TOKENS, 516 *SUBQUERY_PREDICATES, 517 } 518 519 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 520 TokenType.AND: exp.And, 521 } 522 523 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 524 TokenType.COLON_EQ: exp.PropertyEQ, 525 } 526 527 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 528 TokenType.OR: exp.Or, 529 } 530 531 EQUALITY = { 532 TokenType.EQ: exp.EQ, 533 TokenType.NEQ: exp.NEQ, 534 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 535 } 536 537 COMPARISON = { 538 TokenType.GT: exp.GT, 539 TokenType.GTE: exp.GTE, 540 TokenType.LT: exp.LT, 541 TokenType.LTE: exp.LTE, 542 } 543 544 BITWISE = { 545 TokenType.AMP: exp.BitwiseAnd, 546 TokenType.CARET: exp.BitwiseXor, 547 TokenType.PIPE: exp.BitwiseOr, 548 } 549 550 TERM = { 551 TokenType.DASH: exp.Sub, 552 TokenType.PLUS: exp.Add, 553 TokenType.MOD: exp.Mod, 554 TokenType.COLLATE: exp.Collate, 555 } 556 557 FACTOR = { 558 TokenType.DIV: exp.IntDiv, 559 TokenType.LR_ARROW: exp.Distance, 560 TokenType.SLASH: exp.Div, 561 TokenType.STAR: exp.Mul, 562 } 563 564 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 565 566 TIMES = { 567 TokenType.TIME, 568 TokenType.TIMETZ, 569 } 570 571 TIMESTAMPS = { 572 TokenType.TIMESTAMP, 573 TokenType.TIMESTAMPTZ, 574 TokenType.TIMESTAMPLTZ, 575 *TIMES, 576 } 577 578 SET_OPERATIONS = { 579 TokenType.UNION, 580 TokenType.INTERSECT, 581 TokenType.EXCEPT, 582 } 583 584 JOIN_METHODS = { 585 TokenType.ASOF, 586 TokenType.NATURAL, 587 TokenType.POSITIONAL, 588 } 589 590 JOIN_SIDES = { 591 TokenType.LEFT, 592 TokenType.RIGHT, 593 TokenType.FULL, 594 } 595 596 JOIN_KINDS = { 597 TokenType.ANTI, 598 TokenType.CROSS, 599 TokenType.INNER, 600 TokenType.OUTER, 601 TokenType.SEMI, 602 TokenType.STRAIGHT_JOIN, 603 } 604 605 JOIN_HINTS: t.Set[str] = set() 606 607 LAMBDAS = { 608 TokenType.ARROW: lambda self, expressions: self.expression( 609 exp.Lambda, 610 this=self._replace_lambda( 611 self._parse_assignment(), 612 expressions, 613 ), 614 expressions=expressions, 615 ), 616 TokenType.FARROW: lambda self, expressions: self.expression( 617 exp.Kwarg, 618 this=exp.var(expressions[0].name), 619 expression=self._parse_assignment(), 620 ), 621 } 622 623 COLUMN_OPERATORS = { 624 TokenType.DOT: None, 625 TokenType.DCOLON: lambda self, this, to: self.expression( 626 exp.Cast if self.STRICT_CAST else exp.TryCast, 627 this=this, 628 to=to, 629 ), 630 TokenType.ARROW: lambda self, this, path: self.expression( 631 exp.JSONExtract, 632 this=this, 633 expression=self.dialect.to_json_path(path), 634 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 635 ), 636 TokenType.DARROW: lambda self, this, path: self.expression( 637 exp.JSONExtractScalar, 638 this=this, 639 expression=self.dialect.to_json_path(path), 640 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 641 ), 642 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 643 exp.JSONBExtract, 644 this=this, 645 expression=path, 646 ), 647 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 648 exp.JSONBExtractScalar, 649 this=this, 650 expression=path, 651 ), 652 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 653 exp.JSONBContains, 654 this=this, 655 expression=key, 656 ), 657 } 658 659 EXPRESSION_PARSERS = { 660 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 661 exp.Column: lambda self: self._parse_column(), 662 exp.Condition: lambda self: self._parse_assignment(), 663 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 664 exp.Expression: lambda self: self._parse_expression(), 665 exp.From: lambda self: self._parse_from(joins=True), 666 exp.Group: lambda self: self._parse_group(), 667 exp.Having: lambda self: self._parse_having(), 668 exp.Identifier: lambda self: self._parse_id_var(), 669 exp.Join: lambda self: self._parse_join(), 670 exp.Lambda: lambda self: self._parse_lambda(), 671 exp.Lateral: lambda self: self._parse_lateral(), 672 exp.Limit: lambda self: self._parse_limit(), 673 exp.Offset: lambda self: self._parse_offset(), 674 exp.Order: lambda self: self._parse_order(), 675 exp.Ordered: lambda self: self._parse_ordered(), 676 exp.Properties: lambda self: self._parse_properties(), 677 exp.Qualify: lambda self: self._parse_qualify(), 678 exp.Returning: lambda self: self._parse_returning(), 679 exp.Select: lambda self: self._parse_select(), 680 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 681 exp.Table: lambda self: self._parse_table_parts(), 682 exp.TableAlias: lambda self: self._parse_table_alias(), 683 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 684 exp.Where: lambda self: self._parse_where(), 685 exp.Window: lambda self: self._parse_named_window(), 686 exp.With: lambda self: self._parse_with(), 687 "JOIN_TYPE": lambda self: self._parse_join_parts(), 688 } 689 690 STATEMENT_PARSERS = { 691 TokenType.ALTER: lambda self: self._parse_alter(), 692 TokenType.BEGIN: lambda self: self._parse_transaction(), 693 TokenType.CACHE: lambda self: self._parse_cache(), 694 TokenType.COMMENT: lambda self: self._parse_comment(), 695 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 696 TokenType.COPY: lambda self: self._parse_copy(), 697 TokenType.CREATE: lambda self: self._parse_create(), 698 TokenType.DELETE: lambda self: self._parse_delete(), 699 TokenType.DESC: lambda self: self._parse_describe(), 700 TokenType.DESCRIBE: lambda self: self._parse_describe(), 701 TokenType.DROP: lambda self: self._parse_drop(), 702 TokenType.INSERT: lambda self: self._parse_insert(), 703 TokenType.KILL: lambda self: self._parse_kill(), 704 TokenType.LOAD: lambda self: self._parse_load(), 705 TokenType.MERGE: lambda self: self._parse_merge(), 706 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 707 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 708 TokenType.REFRESH: lambda self: self._parse_refresh(), 709 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 710 TokenType.SET: lambda self: self._parse_set(), 711 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 712 TokenType.UNCACHE: lambda self: self._parse_uncache(), 713 TokenType.UPDATE: lambda self: self._parse_update(), 714 TokenType.USE: lambda self: self.expression( 715 exp.Use, 716 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 717 this=self._parse_table(schema=False), 718 ), 719 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 720 } 721 722 UNARY_PARSERS = { 723 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 724 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 725 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 726 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 727 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 728 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 729 } 730 731 STRING_PARSERS = { 732 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 733 exp.RawString, this=token.text 734 ), 735 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 736 exp.National, this=token.text 737 ), 738 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 739 TokenType.STRING: lambda self, token: self.expression( 740 exp.Literal, this=token.text, is_string=True 741 ), 742 TokenType.UNICODE_STRING: lambda self, token: self.expression( 743 exp.UnicodeString, 744 this=token.text, 745 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 746 ), 747 } 748 749 NUMERIC_PARSERS = { 750 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 751 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 752 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 753 TokenType.NUMBER: lambda self, token: self.expression( 754 exp.Literal, this=token.text, is_string=False 755 ), 756 } 757 758 PRIMARY_PARSERS = { 759 **STRING_PARSERS, 760 **NUMERIC_PARSERS, 761 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 762 TokenType.NULL: lambda self, _: self.expression(exp.Null), 763 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 764 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 765 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 766 TokenType.STAR: lambda self, _: self.expression( 767 exp.Star, 768 **{ 769 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 770 "replace": self._parse_star_op("REPLACE"), 771 "rename": self._parse_star_op("RENAME"), 772 }, 773 ), 774 } 775 776 PLACEHOLDER_PARSERS = { 777 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 778 TokenType.PARAMETER: lambda self: self._parse_parameter(), 779 TokenType.COLON: lambda self: ( 780 self.expression(exp.Placeholder, this=self._prev.text) 781 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 782 else None 783 ), 784 } 785 786 RANGE_PARSERS = { 787 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 788 TokenType.GLOB: binary_range_parser(exp.Glob), 789 TokenType.ILIKE: binary_range_parser(exp.ILike), 790 TokenType.IN: lambda self, this: self._parse_in(this), 791 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 792 TokenType.IS: lambda self, this: self._parse_is(this), 793 TokenType.LIKE: binary_range_parser(exp.Like), 794 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 795 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 796 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 797 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 798 } 799 800 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 801 "ALLOWED_VALUES": lambda self: self.expression( 802 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 803 ), 804 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 805 "AUTO": lambda self: self._parse_auto_property(), 806 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 807 "BACKUP": lambda self: self.expression( 808 exp.BackupProperty, this=self._parse_var(any_token=True) 809 ), 810 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 811 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 812 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 813 "CHECKSUM": lambda self: self._parse_checksum(), 814 "CLUSTER BY": lambda self: self._parse_cluster(), 815 "CLUSTERED": lambda self: self._parse_clustered_by(), 816 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 817 exp.CollateProperty, **kwargs 818 ), 819 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 820 "CONTAINS": lambda self: self._parse_contains_property(), 821 "COPY": lambda self: self._parse_copy_property(), 822 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 823 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 824 "DEFINER": lambda self: self._parse_definer(), 825 "DETERMINISTIC": lambda self: self.expression( 826 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 827 ), 828 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 829 "DISTKEY": lambda self: self._parse_distkey(), 830 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 831 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 832 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 833 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 834 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 835 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 836 "FREESPACE": lambda self: self._parse_freespace(), 837 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 838 "HEAP": lambda self: self.expression(exp.HeapProperty), 839 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 840 "IMMUTABLE": lambda self: self.expression( 841 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 842 ), 843 "INHERITS": lambda self: self.expression( 844 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 845 ), 846 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 847 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 848 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 849 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 850 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 851 "LIKE": lambda self: self._parse_create_like(), 852 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 853 "LOCK": lambda self: self._parse_locking(), 854 "LOCKING": lambda self: self._parse_locking(), 855 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 856 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 857 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 858 "MODIFIES": lambda self: self._parse_modifies_property(), 859 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 860 "NO": lambda self: self._parse_no_property(), 861 "ON": lambda self: self._parse_on_property(), 862 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 863 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 864 "PARTITION": lambda self: self._parse_partitioned_of(), 865 "PARTITION BY": lambda self: self._parse_partitioned_by(), 866 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 867 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 868 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 869 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 870 "READS": lambda self: self._parse_reads_property(), 871 "REMOTE": lambda self: self._parse_remote_with_connection(), 872 "RETURNS": lambda self: self._parse_returns(), 873 "STRICT": lambda self: self.expression(exp.StrictProperty), 874 "ROW": lambda self: self._parse_row(), 875 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 876 "SAMPLE": lambda self: self.expression( 877 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 878 ), 879 "SECURE": lambda self: self.expression(exp.SecureProperty), 880 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 881 "SETTINGS": lambda self: self.expression( 882 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 883 ), 884 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 885 "SORTKEY": lambda self: self._parse_sortkey(), 886 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 887 "STABLE": lambda self: self.expression( 888 exp.StabilityProperty, this=exp.Literal.string("STABLE") 889 ), 890 "STORED": lambda self: self._parse_stored(), 891 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 892 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 893 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 894 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 895 "TO": lambda self: self._parse_to_table(), 896 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 897 "TRANSFORM": lambda self: self.expression( 898 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 899 ), 900 "TTL": lambda self: self._parse_ttl(), 901 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 902 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 903 "VOLATILE": lambda self: self._parse_volatile_property(), 904 "WITH": lambda self: self._parse_with_property(), 905 } 906 907 CONSTRAINT_PARSERS = { 908 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 909 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 910 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 911 "CHARACTER SET": lambda self: self.expression( 912 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 913 ), 914 "CHECK": lambda self: self.expression( 915 exp.CheckColumnConstraint, 916 this=self._parse_wrapped(self._parse_assignment), 917 enforced=self._match_text_seq("ENFORCED"), 918 ), 919 "COLLATE": lambda self: self.expression( 920 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 921 ), 922 "COMMENT": lambda self: self.expression( 923 exp.CommentColumnConstraint, this=self._parse_string() 924 ), 925 "COMPRESS": lambda self: self._parse_compress(), 926 "CLUSTERED": lambda self: self.expression( 927 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 928 ), 929 "NONCLUSTERED": lambda self: self.expression( 930 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 931 ), 932 "DEFAULT": lambda self: self.expression( 933 exp.DefaultColumnConstraint, this=self._parse_bitwise() 934 ), 935 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 936 "EPHEMERAL": lambda self: self.expression( 937 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 938 ), 939 "EXCLUDE": lambda self: self.expression( 940 exp.ExcludeColumnConstraint, this=self._parse_index_params() 941 ), 942 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 943 "FORMAT": lambda self: self.expression( 944 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 945 ), 946 "GENERATED": lambda self: self._parse_generated_as_identity(), 947 "IDENTITY": lambda self: self._parse_auto_increment(), 948 "INLINE": lambda self: self._parse_inline(), 949 "LIKE": lambda self: self._parse_create_like(), 950 "NOT": lambda self: self._parse_not_constraint(), 951 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 952 "ON": lambda self: ( 953 self._match(TokenType.UPDATE) 954 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 955 ) 956 or self.expression(exp.OnProperty, this=self._parse_id_var()), 957 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 958 "PERIOD": lambda self: self._parse_period_for_system_time(), 959 "PRIMARY KEY": lambda self: self._parse_primary_key(), 960 "REFERENCES": lambda self: self._parse_references(match=False), 961 "TITLE": lambda self: self.expression( 962 exp.TitleColumnConstraint, this=self._parse_var_or_string() 963 ), 964 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 965 "UNIQUE": lambda self: self._parse_unique(), 966 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 967 "WITH": lambda self: self.expression( 968 exp.Properties, expressions=self._parse_wrapped_properties() 969 ), 970 } 971 972 ALTER_PARSERS = { 973 "ADD": lambda self: self._parse_alter_table_add(), 974 "ALTER": lambda self: self._parse_alter_table_alter(), 975 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 976 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 977 "DROP": lambda self: self._parse_alter_table_drop(), 978 "RENAME": lambda self: self._parse_alter_table_rename(), 979 "SET": lambda self: self._parse_alter_table_set(), 980 } 981 982 ALTER_ALTER_PARSERS = { 983 "DISTKEY": lambda self: self._parse_alter_diststyle(), 984 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 985 "SORTKEY": lambda self: self._parse_alter_sortkey(), 986 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 987 } 988 989 SCHEMA_UNNAMED_CONSTRAINTS = { 990 "CHECK", 991 "EXCLUDE", 992 "FOREIGN KEY", 993 "LIKE", 994 "PERIOD", 995 "PRIMARY KEY", 996 "UNIQUE", 997 } 998 999 NO_PAREN_FUNCTION_PARSERS = { 1000 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1001 "CASE": lambda self: self._parse_case(), 1002 "CONNECT_BY_ROOT": lambda self: self.expression( 1003 exp.ConnectByRoot, this=self._parse_column() 1004 ), 1005 "IF": lambda self: self._parse_if(), 1006 "NEXT": lambda self: self._parse_next_value_for(), 1007 } 1008 1009 INVALID_FUNC_NAME_TOKENS = { 1010 TokenType.IDENTIFIER, 1011 TokenType.STRING, 1012 } 1013 1014 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1015 1016 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1017 1018 FUNCTION_PARSERS = { 1019 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1020 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1021 "DECODE": lambda self: self._parse_decode(), 1022 "EXTRACT": lambda self: self._parse_extract(), 1023 "GAP_FILL": lambda self: self._parse_gap_fill(), 1024 "JSON_OBJECT": lambda self: self._parse_json_object(), 1025 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1026 "JSON_TABLE": lambda self: self._parse_json_table(), 1027 "MATCH": lambda self: self._parse_match_against(), 1028 "OPENJSON": lambda self: self._parse_open_json(), 1029 "POSITION": lambda self: self._parse_position(), 1030 "PREDICT": lambda self: self._parse_predict(), 1031 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1032 "STRING_AGG": lambda self: self._parse_string_agg(), 1033 "SUBSTRING": lambda self: self._parse_substring(), 1034 "TRIM": lambda self: self._parse_trim(), 1035 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1036 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1037 } 1038 1039 QUERY_MODIFIER_PARSERS = { 1040 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1041 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1042 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1043 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1044 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1045 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1046 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1047 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1048 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1049 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1050 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1051 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1052 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1053 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1054 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1055 TokenType.CLUSTER_BY: lambda self: ( 1056 "cluster", 1057 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1058 ), 1059 TokenType.DISTRIBUTE_BY: lambda self: ( 1060 "distribute", 1061 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1062 ), 1063 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1064 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1065 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1066 } 1067 1068 SET_PARSERS = { 1069 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1070 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1071 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1072 "TRANSACTION": lambda self: self._parse_set_transaction(), 1073 } 1074 1075 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1076 1077 TYPE_LITERAL_PARSERS = { 1078 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1079 } 1080 1081 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1082 1083 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1084 1085 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1086 1087 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1088 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1089 "ISOLATION": ( 1090 ("LEVEL", "REPEATABLE", "READ"), 1091 ("LEVEL", "READ", "COMMITTED"), 1092 ("LEVEL", "READ", "UNCOMITTED"), 1093 ("LEVEL", "SERIALIZABLE"), 1094 ), 1095 "READ": ("WRITE", "ONLY"), 1096 } 1097 1098 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1099 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1100 ) 1101 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1102 1103 CREATE_SEQUENCE: OPTIONS_TYPE = { 1104 "SCALE": ("EXTEND", "NOEXTEND"), 1105 "SHARD": ("EXTEND", "NOEXTEND"), 1106 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1107 **dict.fromkeys( 1108 ( 1109 "SESSION", 1110 "GLOBAL", 1111 "KEEP", 1112 "NOKEEP", 1113 "ORDER", 1114 "NOORDER", 1115 "NOCACHE", 1116 "CYCLE", 1117 "NOCYCLE", 1118 "NOMINVALUE", 1119 "NOMAXVALUE", 1120 "NOSCALE", 1121 "NOSHARD", 1122 ), 1123 tuple(), 1124 ), 1125 } 1126 1127 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1128 1129 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1130 1131 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1132 1133 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1134 "TYPE": ("EVOLUTION",), 1135 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1136 } 1137 1138 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1139 1140 CLONE_KEYWORDS = {"CLONE", "COPY"} 1141 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1142 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1143 1144 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1145 1146 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1147 1148 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1149 1150 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1151 1152 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1153 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1154 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1155 1156 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1157 1158 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1159 1160 ADD_CONSTRAINT_TOKENS = { 1161 TokenType.CONSTRAINT, 1162 TokenType.FOREIGN_KEY, 1163 TokenType.INDEX, 1164 TokenType.KEY, 1165 TokenType.PRIMARY_KEY, 1166 TokenType.UNIQUE, 1167 } 1168 1169 DISTINCT_TOKENS = {TokenType.DISTINCT} 1170 1171 NULL_TOKENS = {TokenType.NULL} 1172 1173 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1174 1175 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1176 1177 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1178 1179 STRICT_CAST = True 1180 1181 PREFIXED_PIVOT_COLUMNS = False 1182 IDENTIFY_PIVOT_STRINGS = False 1183 1184 LOG_DEFAULTS_TO_LN = False 1185 1186 # Whether ADD is present for each column added by ALTER TABLE 1187 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1188 1189 # Whether the table sample clause expects CSV syntax 1190 TABLESAMPLE_CSV = False 1191 1192 # The default method used for table sampling 1193 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1194 1195 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1196 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1197 1198 # Whether the TRIM function expects the characters to trim as its first argument 1199 TRIM_PATTERN_FIRST = False 1200 1201 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1202 STRING_ALIASES = False 1203 1204 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1205 MODIFIERS_ATTACHED_TO_SET_OP = True 1206 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1207 1208 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1209 NO_PAREN_IF_COMMANDS = True 1210 1211 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1212 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1213 1214 # Whether the `:` operator is used to extract a value from a VARIANT column 1215 COLON_IS_VARIANT_EXTRACT = False 1216 1217 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1218 # If this is True and '(' is not found, the keyword will be treated as an identifier 1219 VALUES_FOLLOWED_BY_PAREN = True 1220 1221 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1222 SUPPORTS_IMPLICIT_UNNEST = False 1223 1224 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1225 INTERVAL_SPANS = True 1226 1227 # Whether a PARTITION clause can follow a table reference 1228 SUPPORTS_PARTITION_SELECTION = False 1229 1230 __slots__ = ( 1231 "error_level", 1232 "error_message_context", 1233 "max_errors", 1234 "dialect", 1235 "sql", 1236 "errors", 1237 "_tokens", 1238 "_index", 1239 "_curr", 1240 "_next", 1241 "_prev", 1242 "_prev_comments", 1243 ) 1244 1245 # Autofilled 1246 SHOW_TRIE: t.Dict = {} 1247 SET_TRIE: t.Dict = {} 1248 1249 def __init__( 1250 self, 1251 error_level: t.Optional[ErrorLevel] = None, 1252 error_message_context: int = 100, 1253 max_errors: int = 3, 1254 dialect: DialectType = None, 1255 ): 1256 from sqlglot.dialects import Dialect 1257 1258 self.error_level = error_level or ErrorLevel.IMMEDIATE 1259 self.error_message_context = error_message_context 1260 self.max_errors = max_errors 1261 self.dialect = Dialect.get_or_raise(dialect) 1262 self.reset() 1263 1264 def reset(self): 1265 self.sql = "" 1266 self.errors = [] 1267 self._tokens = [] 1268 self._index = 0 1269 self._curr = None 1270 self._next = None 1271 self._prev = None 1272 self._prev_comments = None 1273 1274 def parse( 1275 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1276 ) -> t.List[t.Optional[exp.Expression]]: 1277 """ 1278 Parses a list of tokens and returns a list of syntax trees, one tree 1279 per parsed SQL statement. 1280 1281 Args: 1282 raw_tokens: The list of tokens. 1283 sql: The original SQL string, used to produce helpful debug messages. 1284 1285 Returns: 1286 The list of the produced syntax trees. 1287 """ 1288 return self._parse( 1289 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1290 ) 1291 1292 def parse_into( 1293 self, 1294 expression_types: exp.IntoType, 1295 raw_tokens: t.List[Token], 1296 sql: t.Optional[str] = None, 1297 ) -> t.List[t.Optional[exp.Expression]]: 1298 """ 1299 Parses a list of tokens into a given Expression type. If a collection of Expression 1300 types is given instead, this method will try to parse the token list into each one 1301 of them, stopping at the first for which the parsing succeeds. 1302 1303 Args: 1304 expression_types: The expression type(s) to try and parse the token list into. 1305 raw_tokens: The list of tokens. 1306 sql: The original SQL string, used to produce helpful debug messages. 1307 1308 Returns: 1309 The target Expression. 1310 """ 1311 errors = [] 1312 for expression_type in ensure_list(expression_types): 1313 parser = self.EXPRESSION_PARSERS.get(expression_type) 1314 if not parser: 1315 raise TypeError(f"No parser registered for {expression_type}") 1316 1317 try: 1318 return self._parse(parser, raw_tokens, sql) 1319 except ParseError as e: 1320 e.errors[0]["into_expression"] = expression_type 1321 errors.append(e) 1322 1323 raise ParseError( 1324 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1325 errors=merge_errors(errors), 1326 ) from errors[-1] 1327 1328 def _parse( 1329 self, 1330 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1331 raw_tokens: t.List[Token], 1332 sql: t.Optional[str] = None, 1333 ) -> t.List[t.Optional[exp.Expression]]: 1334 self.reset() 1335 self.sql = sql or "" 1336 1337 total = len(raw_tokens) 1338 chunks: t.List[t.List[Token]] = [[]] 1339 1340 for i, token in enumerate(raw_tokens): 1341 if token.token_type == TokenType.SEMICOLON: 1342 if token.comments: 1343 chunks.append([token]) 1344 1345 if i < total - 1: 1346 chunks.append([]) 1347 else: 1348 chunks[-1].append(token) 1349 1350 expressions = [] 1351 1352 for tokens in chunks: 1353 self._index = -1 1354 self._tokens = tokens 1355 self._advance() 1356 1357 expressions.append(parse_method(self)) 1358 1359 if self._index < len(self._tokens): 1360 self.raise_error("Invalid expression / Unexpected token") 1361 1362 self.check_errors() 1363 1364 return expressions 1365 1366 def check_errors(self) -> None: 1367 """Logs or raises any found errors, depending on the chosen error level setting.""" 1368 if self.error_level == ErrorLevel.WARN: 1369 for error in self.errors: 1370 logger.error(str(error)) 1371 elif self.error_level == ErrorLevel.RAISE and self.errors: 1372 raise ParseError( 1373 concat_messages(self.errors, self.max_errors), 1374 errors=merge_errors(self.errors), 1375 ) 1376 1377 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1378 """ 1379 Appends an error in the list of recorded errors or raises it, depending on the chosen 1380 error level setting. 1381 """ 1382 token = token or self._curr or self._prev or Token.string("") 1383 start = token.start 1384 end = token.end + 1 1385 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1386 highlight = self.sql[start:end] 1387 end_context = self.sql[end : end + self.error_message_context] 1388 1389 error = ParseError.new( 1390 f"{message}. Line {token.line}, Col: {token.col}.\n" 1391 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1392 description=message, 1393 line=token.line, 1394 col=token.col, 1395 start_context=start_context, 1396 highlight=highlight, 1397 end_context=end_context, 1398 ) 1399 1400 if self.error_level == ErrorLevel.IMMEDIATE: 1401 raise error 1402 1403 self.errors.append(error) 1404 1405 def expression( 1406 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1407 ) -> E: 1408 """ 1409 Creates a new, validated Expression. 1410 1411 Args: 1412 exp_class: The expression class to instantiate. 1413 comments: An optional list of comments to attach to the expression. 1414 kwargs: The arguments to set for the expression along with their respective values. 1415 1416 Returns: 1417 The target expression. 1418 """ 1419 instance = exp_class(**kwargs) 1420 instance.add_comments(comments) if comments else self._add_comments(instance) 1421 return self.validate_expression(instance) 1422 1423 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1424 if expression and self._prev_comments: 1425 expression.add_comments(self._prev_comments) 1426 self._prev_comments = None 1427 1428 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1429 """ 1430 Validates an Expression, making sure that all its mandatory arguments are set. 1431 1432 Args: 1433 expression: The expression to validate. 1434 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1435 1436 Returns: 1437 The validated expression. 1438 """ 1439 if self.error_level != ErrorLevel.IGNORE: 1440 for error_message in expression.error_messages(args): 1441 self.raise_error(error_message) 1442 1443 return expression 1444 1445 def _find_sql(self, start: Token, end: Token) -> str: 1446 return self.sql[start.start : end.end + 1] 1447 1448 def _is_connected(self) -> bool: 1449 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1450 1451 def _advance(self, times: int = 1) -> None: 1452 self._index += times 1453 self._curr = seq_get(self._tokens, self._index) 1454 self._next = seq_get(self._tokens, self._index + 1) 1455 1456 if self._index > 0: 1457 self._prev = self._tokens[self._index - 1] 1458 self._prev_comments = self._prev.comments 1459 else: 1460 self._prev = None 1461 self._prev_comments = None 1462 1463 def _retreat(self, index: int) -> None: 1464 if index != self._index: 1465 self._advance(index - self._index) 1466 1467 def _warn_unsupported(self) -> None: 1468 if len(self._tokens) <= 1: 1469 return 1470 1471 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1472 # interested in emitting a warning for the one being currently processed. 1473 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1474 1475 logger.warning( 1476 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1477 ) 1478 1479 def _parse_command(self) -> exp.Command: 1480 self._warn_unsupported() 1481 return self.expression( 1482 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1483 ) 1484 1485 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1486 """ 1487 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1488 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1489 solve this by setting & resetting the parser state accordingly 1490 """ 1491 index = self._index 1492 error_level = self.error_level 1493 1494 self.error_level = ErrorLevel.IMMEDIATE 1495 try: 1496 this = parse_method() 1497 except ParseError: 1498 this = None 1499 finally: 1500 if not this or retreat: 1501 self._retreat(index) 1502 self.error_level = error_level 1503 1504 return this 1505 1506 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1507 start = self._prev 1508 exists = self._parse_exists() if allow_exists else None 1509 1510 self._match(TokenType.ON) 1511 1512 materialized = self._match_text_seq("MATERIALIZED") 1513 kind = self._match_set(self.CREATABLES) and self._prev 1514 if not kind: 1515 return self._parse_as_command(start) 1516 1517 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1518 this = self._parse_user_defined_function(kind=kind.token_type) 1519 elif kind.token_type == TokenType.TABLE: 1520 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1521 elif kind.token_type == TokenType.COLUMN: 1522 this = self._parse_column() 1523 else: 1524 this = self._parse_id_var() 1525 1526 self._match(TokenType.IS) 1527 1528 return self.expression( 1529 exp.Comment, 1530 this=this, 1531 kind=kind.text, 1532 expression=self._parse_string(), 1533 exists=exists, 1534 materialized=materialized, 1535 ) 1536 1537 def _parse_to_table( 1538 self, 1539 ) -> exp.ToTableProperty: 1540 table = self._parse_table_parts(schema=True) 1541 return self.expression(exp.ToTableProperty, this=table) 1542 1543 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1544 def _parse_ttl(self) -> exp.Expression: 1545 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1546 this = self._parse_bitwise() 1547 1548 if self._match_text_seq("DELETE"): 1549 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1550 if self._match_text_seq("RECOMPRESS"): 1551 return self.expression( 1552 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1553 ) 1554 if self._match_text_seq("TO", "DISK"): 1555 return self.expression( 1556 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1557 ) 1558 if self._match_text_seq("TO", "VOLUME"): 1559 return self.expression( 1560 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1561 ) 1562 1563 return this 1564 1565 expressions = self._parse_csv(_parse_ttl_action) 1566 where = self._parse_where() 1567 group = self._parse_group() 1568 1569 aggregates = None 1570 if group and self._match(TokenType.SET): 1571 aggregates = self._parse_csv(self._parse_set_item) 1572 1573 return self.expression( 1574 exp.MergeTreeTTL, 1575 expressions=expressions, 1576 where=where, 1577 group=group, 1578 aggregates=aggregates, 1579 ) 1580 1581 def _parse_statement(self) -> t.Optional[exp.Expression]: 1582 if self._curr is None: 1583 return None 1584 1585 if self._match_set(self.STATEMENT_PARSERS): 1586 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1587 1588 if self._match_set(self.dialect.tokenizer.COMMANDS): 1589 return self._parse_command() 1590 1591 expression = self._parse_expression() 1592 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1593 return self._parse_query_modifiers(expression) 1594 1595 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1596 start = self._prev 1597 temporary = self._match(TokenType.TEMPORARY) 1598 materialized = self._match_text_seq("MATERIALIZED") 1599 1600 kind = self._match_set(self.CREATABLES) and self._prev.text 1601 if not kind: 1602 return self._parse_as_command(start) 1603 1604 if_exists = exists or self._parse_exists() 1605 table = self._parse_table_parts( 1606 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1607 ) 1608 1609 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1610 1611 if self._match(TokenType.L_PAREN, advance=False): 1612 expressions = self._parse_wrapped_csv(self._parse_types) 1613 else: 1614 expressions = None 1615 1616 return self.expression( 1617 exp.Drop, 1618 comments=start.comments, 1619 exists=if_exists, 1620 this=table, 1621 expressions=expressions, 1622 kind=kind.upper(), 1623 temporary=temporary, 1624 materialized=materialized, 1625 cascade=self._match_text_seq("CASCADE"), 1626 constraints=self._match_text_seq("CONSTRAINTS"), 1627 purge=self._match_text_seq("PURGE"), 1628 cluster=cluster, 1629 ) 1630 1631 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1632 return ( 1633 self._match_text_seq("IF") 1634 and (not not_ or self._match(TokenType.NOT)) 1635 and self._match(TokenType.EXISTS) 1636 ) 1637 1638 def _parse_create(self) -> exp.Create | exp.Command: 1639 # Note: this can't be None because we've matched a statement parser 1640 start = self._prev 1641 comments = self._prev_comments 1642 1643 replace = ( 1644 start.token_type == TokenType.REPLACE 1645 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1646 or self._match_pair(TokenType.OR, TokenType.ALTER) 1647 ) 1648 1649 unique = self._match(TokenType.UNIQUE) 1650 1651 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1652 self._advance() 1653 1654 properties = None 1655 create_token = self._match_set(self.CREATABLES) and self._prev 1656 1657 if not create_token: 1658 # exp.Properties.Location.POST_CREATE 1659 properties = self._parse_properties() 1660 create_token = self._match_set(self.CREATABLES) and self._prev 1661 1662 if not properties or not create_token: 1663 return self._parse_as_command(start) 1664 1665 exists = self._parse_exists(not_=True) 1666 this = None 1667 expression: t.Optional[exp.Expression] = None 1668 indexes = None 1669 no_schema_binding = None 1670 begin = None 1671 end = None 1672 clone = None 1673 1674 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1675 nonlocal properties 1676 if properties and temp_props: 1677 properties.expressions.extend(temp_props.expressions) 1678 elif temp_props: 1679 properties = temp_props 1680 1681 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1682 this = self._parse_user_defined_function(kind=create_token.token_type) 1683 1684 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1685 extend_props(self._parse_properties()) 1686 1687 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1688 extend_props(self._parse_properties()) 1689 1690 if not expression: 1691 if self._match(TokenType.COMMAND): 1692 expression = self._parse_as_command(self._prev) 1693 else: 1694 begin = self._match(TokenType.BEGIN) 1695 return_ = self._match_text_seq("RETURN") 1696 1697 if self._match(TokenType.STRING, advance=False): 1698 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1699 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1700 expression = self._parse_string() 1701 extend_props(self._parse_properties()) 1702 else: 1703 expression = self._parse_statement() 1704 1705 end = self._match_text_seq("END") 1706 1707 if return_: 1708 expression = self.expression(exp.Return, this=expression) 1709 elif create_token.token_type == TokenType.INDEX: 1710 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1711 if not self._match(TokenType.ON): 1712 index = self._parse_id_var() 1713 anonymous = False 1714 else: 1715 index = None 1716 anonymous = True 1717 1718 this = self._parse_index(index=index, anonymous=anonymous) 1719 elif create_token.token_type in self.DB_CREATABLES: 1720 table_parts = self._parse_table_parts( 1721 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1722 ) 1723 1724 # exp.Properties.Location.POST_NAME 1725 self._match(TokenType.COMMA) 1726 extend_props(self._parse_properties(before=True)) 1727 1728 this = self._parse_schema(this=table_parts) 1729 1730 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1731 extend_props(self._parse_properties()) 1732 1733 self._match(TokenType.ALIAS) 1734 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1735 # exp.Properties.Location.POST_ALIAS 1736 extend_props(self._parse_properties()) 1737 1738 if create_token.token_type == TokenType.SEQUENCE: 1739 expression = self._parse_types() 1740 extend_props(self._parse_properties()) 1741 else: 1742 expression = self._parse_ddl_select() 1743 1744 if create_token.token_type == TokenType.TABLE: 1745 # exp.Properties.Location.POST_EXPRESSION 1746 extend_props(self._parse_properties()) 1747 1748 indexes = [] 1749 while True: 1750 index = self._parse_index() 1751 1752 # exp.Properties.Location.POST_INDEX 1753 extend_props(self._parse_properties()) 1754 1755 if not index: 1756 break 1757 else: 1758 self._match(TokenType.COMMA) 1759 indexes.append(index) 1760 elif create_token.token_type == TokenType.VIEW: 1761 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1762 no_schema_binding = True 1763 1764 shallow = self._match_text_seq("SHALLOW") 1765 1766 if self._match_texts(self.CLONE_KEYWORDS): 1767 copy = self._prev.text.lower() == "copy" 1768 clone = self.expression( 1769 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1770 ) 1771 1772 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1773 return self._parse_as_command(start) 1774 1775 return self.expression( 1776 exp.Create, 1777 comments=comments, 1778 this=this, 1779 kind=create_token.text.upper(), 1780 replace=replace, 1781 unique=unique, 1782 expression=expression, 1783 exists=exists, 1784 properties=properties, 1785 indexes=indexes, 1786 no_schema_binding=no_schema_binding, 1787 begin=begin, 1788 end=end, 1789 clone=clone, 1790 ) 1791 1792 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1793 seq = exp.SequenceProperties() 1794 1795 options = [] 1796 index = self._index 1797 1798 while self._curr: 1799 self._match(TokenType.COMMA) 1800 if self._match_text_seq("INCREMENT"): 1801 self._match_text_seq("BY") 1802 self._match_text_seq("=") 1803 seq.set("increment", self._parse_term()) 1804 elif self._match_text_seq("MINVALUE"): 1805 seq.set("minvalue", self._parse_term()) 1806 elif self._match_text_seq("MAXVALUE"): 1807 seq.set("maxvalue", self._parse_term()) 1808 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1809 self._match_text_seq("=") 1810 seq.set("start", self._parse_term()) 1811 elif self._match_text_seq("CACHE"): 1812 # T-SQL allows empty CACHE which is initialized dynamically 1813 seq.set("cache", self._parse_number() or True) 1814 elif self._match_text_seq("OWNED", "BY"): 1815 # "OWNED BY NONE" is the default 1816 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1817 else: 1818 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1819 if opt: 1820 options.append(opt) 1821 else: 1822 break 1823 1824 seq.set("options", options if options else None) 1825 return None if self._index == index else seq 1826 1827 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1828 # only used for teradata currently 1829 self._match(TokenType.COMMA) 1830 1831 kwargs = { 1832 "no": self._match_text_seq("NO"), 1833 "dual": self._match_text_seq("DUAL"), 1834 "before": self._match_text_seq("BEFORE"), 1835 "default": self._match_text_seq("DEFAULT"), 1836 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1837 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1838 "after": self._match_text_seq("AFTER"), 1839 "minimum": self._match_texts(("MIN", "MINIMUM")), 1840 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1841 } 1842 1843 if self._match_texts(self.PROPERTY_PARSERS): 1844 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1845 try: 1846 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1847 except TypeError: 1848 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1849 1850 return None 1851 1852 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1853 return self._parse_wrapped_csv(self._parse_property) 1854 1855 def _parse_property(self) -> t.Optional[exp.Expression]: 1856 if self._match_texts(self.PROPERTY_PARSERS): 1857 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1858 1859 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1860 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1861 1862 if self._match_text_seq("COMPOUND", "SORTKEY"): 1863 return self._parse_sortkey(compound=True) 1864 1865 if self._match_text_seq("SQL", "SECURITY"): 1866 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1867 1868 index = self._index 1869 key = self._parse_column() 1870 1871 if not self._match(TokenType.EQ): 1872 self._retreat(index) 1873 return self._parse_sequence_properties() 1874 1875 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1876 if isinstance(key, exp.Column): 1877 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1878 1879 value = self._parse_bitwise() or self._parse_var(any_token=True) 1880 1881 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1882 if isinstance(value, exp.Column): 1883 value = exp.var(value.name) 1884 1885 return self.expression(exp.Property, this=key, value=value) 1886 1887 def _parse_stored(self) -> exp.FileFormatProperty: 1888 self._match(TokenType.ALIAS) 1889 1890 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1891 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1892 1893 return self.expression( 1894 exp.FileFormatProperty, 1895 this=( 1896 self.expression( 1897 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1898 ) 1899 if input_format or output_format 1900 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1901 ), 1902 ) 1903 1904 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1905 field = self._parse_field() 1906 if isinstance(field, exp.Identifier) and not field.quoted: 1907 field = exp.var(field) 1908 1909 return field 1910 1911 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1912 self._match(TokenType.EQ) 1913 self._match(TokenType.ALIAS) 1914 1915 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1916 1917 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1918 properties = [] 1919 while True: 1920 if before: 1921 prop = self._parse_property_before() 1922 else: 1923 prop = self._parse_property() 1924 if not prop: 1925 break 1926 for p in ensure_list(prop): 1927 properties.append(p) 1928 1929 if properties: 1930 return self.expression(exp.Properties, expressions=properties) 1931 1932 return None 1933 1934 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1935 return self.expression( 1936 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1937 ) 1938 1939 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1940 if self._index >= 2: 1941 pre_volatile_token = self._tokens[self._index - 2] 1942 else: 1943 pre_volatile_token = None 1944 1945 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1946 return exp.VolatileProperty() 1947 1948 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1949 1950 def _parse_retention_period(self) -> exp.Var: 1951 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1952 number = self._parse_number() 1953 number_str = f"{number} " if number else "" 1954 unit = self._parse_var(any_token=True) 1955 return exp.var(f"{number_str}{unit}") 1956 1957 def _parse_system_versioning_property( 1958 self, with_: bool = False 1959 ) -> exp.WithSystemVersioningProperty: 1960 self._match(TokenType.EQ) 1961 prop = self.expression( 1962 exp.WithSystemVersioningProperty, 1963 **{ # type: ignore 1964 "on": True, 1965 "with": with_, 1966 }, 1967 ) 1968 1969 if self._match_text_seq("OFF"): 1970 prop.set("on", False) 1971 return prop 1972 1973 self._match(TokenType.ON) 1974 if self._match(TokenType.L_PAREN): 1975 while self._curr and not self._match(TokenType.R_PAREN): 1976 if self._match_text_seq("HISTORY_TABLE", "="): 1977 prop.set("this", self._parse_table_parts()) 1978 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1979 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1980 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1981 prop.set("retention_period", self._parse_retention_period()) 1982 1983 self._match(TokenType.COMMA) 1984 1985 return prop 1986 1987 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1988 self._match(TokenType.EQ) 1989 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1990 prop = self.expression(exp.DataDeletionProperty, on=on) 1991 1992 if self._match(TokenType.L_PAREN): 1993 while self._curr and not self._match(TokenType.R_PAREN): 1994 if self._match_text_seq("FILTER_COLUMN", "="): 1995 prop.set("filter_column", self._parse_column()) 1996 elif self._match_text_seq("RETENTION_PERIOD", "="): 1997 prop.set("retention_period", self._parse_retention_period()) 1998 1999 self._match(TokenType.COMMA) 2000 2001 return prop 2002 2003 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2004 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2005 prop = self._parse_system_versioning_property(with_=True) 2006 self._match_r_paren() 2007 return prop 2008 2009 if self._match(TokenType.L_PAREN, advance=False): 2010 return self._parse_wrapped_properties() 2011 2012 if self._match_text_seq("JOURNAL"): 2013 return self._parse_withjournaltable() 2014 2015 if self._match_texts(self.VIEW_ATTRIBUTES): 2016 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2017 2018 if self._match_text_seq("DATA"): 2019 return self._parse_withdata(no=False) 2020 elif self._match_text_seq("NO", "DATA"): 2021 return self._parse_withdata(no=True) 2022 2023 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2024 return self._parse_serde_properties(with_=True) 2025 2026 if self._match(TokenType.SCHEMA): 2027 return self.expression( 2028 exp.WithSchemaBindingProperty, 2029 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2030 ) 2031 2032 if not self._next: 2033 return None 2034 2035 return self._parse_withisolatedloading() 2036 2037 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2038 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2039 self._match(TokenType.EQ) 2040 2041 user = self._parse_id_var() 2042 self._match(TokenType.PARAMETER) 2043 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2044 2045 if not user or not host: 2046 return None 2047 2048 return exp.DefinerProperty(this=f"{user}@{host}") 2049 2050 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2051 self._match(TokenType.TABLE) 2052 self._match(TokenType.EQ) 2053 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2054 2055 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2056 return self.expression(exp.LogProperty, no=no) 2057 2058 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2059 return self.expression(exp.JournalProperty, **kwargs) 2060 2061 def _parse_checksum(self) -> exp.ChecksumProperty: 2062 self._match(TokenType.EQ) 2063 2064 on = None 2065 if self._match(TokenType.ON): 2066 on = True 2067 elif self._match_text_seq("OFF"): 2068 on = False 2069 2070 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2071 2072 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2073 return self.expression( 2074 exp.Cluster, 2075 expressions=( 2076 self._parse_wrapped_csv(self._parse_ordered) 2077 if wrapped 2078 else self._parse_csv(self._parse_ordered) 2079 ), 2080 ) 2081 2082 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2083 self._match_text_seq("BY") 2084 2085 self._match_l_paren() 2086 expressions = self._parse_csv(self._parse_column) 2087 self._match_r_paren() 2088 2089 if self._match_text_seq("SORTED", "BY"): 2090 self._match_l_paren() 2091 sorted_by = self._parse_csv(self._parse_ordered) 2092 self._match_r_paren() 2093 else: 2094 sorted_by = None 2095 2096 self._match(TokenType.INTO) 2097 buckets = self._parse_number() 2098 self._match_text_seq("BUCKETS") 2099 2100 return self.expression( 2101 exp.ClusteredByProperty, 2102 expressions=expressions, 2103 sorted_by=sorted_by, 2104 buckets=buckets, 2105 ) 2106 2107 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2108 if not self._match_text_seq("GRANTS"): 2109 self._retreat(self._index - 1) 2110 return None 2111 2112 return self.expression(exp.CopyGrantsProperty) 2113 2114 def _parse_freespace(self) -> exp.FreespaceProperty: 2115 self._match(TokenType.EQ) 2116 return self.expression( 2117 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2118 ) 2119 2120 def _parse_mergeblockratio( 2121 self, no: bool = False, default: bool = False 2122 ) -> exp.MergeBlockRatioProperty: 2123 if self._match(TokenType.EQ): 2124 return self.expression( 2125 exp.MergeBlockRatioProperty, 2126 this=self._parse_number(), 2127 percent=self._match(TokenType.PERCENT), 2128 ) 2129 2130 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2131 2132 def _parse_datablocksize( 2133 self, 2134 default: t.Optional[bool] = None, 2135 minimum: t.Optional[bool] = None, 2136 maximum: t.Optional[bool] = None, 2137 ) -> exp.DataBlocksizeProperty: 2138 self._match(TokenType.EQ) 2139 size = self._parse_number() 2140 2141 units = None 2142 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2143 units = self._prev.text 2144 2145 return self.expression( 2146 exp.DataBlocksizeProperty, 2147 size=size, 2148 units=units, 2149 default=default, 2150 minimum=minimum, 2151 maximum=maximum, 2152 ) 2153 2154 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2155 self._match(TokenType.EQ) 2156 always = self._match_text_seq("ALWAYS") 2157 manual = self._match_text_seq("MANUAL") 2158 never = self._match_text_seq("NEVER") 2159 default = self._match_text_seq("DEFAULT") 2160 2161 autotemp = None 2162 if self._match_text_seq("AUTOTEMP"): 2163 autotemp = self._parse_schema() 2164 2165 return self.expression( 2166 exp.BlockCompressionProperty, 2167 always=always, 2168 manual=manual, 2169 never=never, 2170 default=default, 2171 autotemp=autotemp, 2172 ) 2173 2174 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2175 index = self._index 2176 no = self._match_text_seq("NO") 2177 concurrent = self._match_text_seq("CONCURRENT") 2178 2179 if not self._match_text_seq("ISOLATED", "LOADING"): 2180 self._retreat(index) 2181 return None 2182 2183 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2184 return self.expression( 2185 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2186 ) 2187 2188 def _parse_locking(self) -> exp.LockingProperty: 2189 if self._match(TokenType.TABLE): 2190 kind = "TABLE" 2191 elif self._match(TokenType.VIEW): 2192 kind = "VIEW" 2193 elif self._match(TokenType.ROW): 2194 kind = "ROW" 2195 elif self._match_text_seq("DATABASE"): 2196 kind = "DATABASE" 2197 else: 2198 kind = None 2199 2200 if kind in ("DATABASE", "TABLE", "VIEW"): 2201 this = self._parse_table_parts() 2202 else: 2203 this = None 2204 2205 if self._match(TokenType.FOR): 2206 for_or_in = "FOR" 2207 elif self._match(TokenType.IN): 2208 for_or_in = "IN" 2209 else: 2210 for_or_in = None 2211 2212 if self._match_text_seq("ACCESS"): 2213 lock_type = "ACCESS" 2214 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2215 lock_type = "EXCLUSIVE" 2216 elif self._match_text_seq("SHARE"): 2217 lock_type = "SHARE" 2218 elif self._match_text_seq("READ"): 2219 lock_type = "READ" 2220 elif self._match_text_seq("WRITE"): 2221 lock_type = "WRITE" 2222 elif self._match_text_seq("CHECKSUM"): 2223 lock_type = "CHECKSUM" 2224 else: 2225 lock_type = None 2226 2227 override = self._match_text_seq("OVERRIDE") 2228 2229 return self.expression( 2230 exp.LockingProperty, 2231 this=this, 2232 kind=kind, 2233 for_or_in=for_or_in, 2234 lock_type=lock_type, 2235 override=override, 2236 ) 2237 2238 def _parse_partition_by(self) -> t.List[exp.Expression]: 2239 if self._match(TokenType.PARTITION_BY): 2240 return self._parse_csv(self._parse_assignment) 2241 return [] 2242 2243 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2244 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2245 if self._match_text_seq("MINVALUE"): 2246 return exp.var("MINVALUE") 2247 if self._match_text_seq("MAXVALUE"): 2248 return exp.var("MAXVALUE") 2249 return self._parse_bitwise() 2250 2251 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2252 expression = None 2253 from_expressions = None 2254 to_expressions = None 2255 2256 if self._match(TokenType.IN): 2257 this = self._parse_wrapped_csv(self._parse_bitwise) 2258 elif self._match(TokenType.FROM): 2259 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2260 self._match_text_seq("TO") 2261 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2262 elif self._match_text_seq("WITH", "(", "MODULUS"): 2263 this = self._parse_number() 2264 self._match_text_seq(",", "REMAINDER") 2265 expression = self._parse_number() 2266 self._match_r_paren() 2267 else: 2268 self.raise_error("Failed to parse partition bound spec.") 2269 2270 return self.expression( 2271 exp.PartitionBoundSpec, 2272 this=this, 2273 expression=expression, 2274 from_expressions=from_expressions, 2275 to_expressions=to_expressions, 2276 ) 2277 2278 # https://www.postgresql.org/docs/current/sql-createtable.html 2279 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2280 if not self._match_text_seq("OF"): 2281 self._retreat(self._index - 1) 2282 return None 2283 2284 this = self._parse_table(schema=True) 2285 2286 if self._match(TokenType.DEFAULT): 2287 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2288 elif self._match_text_seq("FOR", "VALUES"): 2289 expression = self._parse_partition_bound_spec() 2290 else: 2291 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2292 2293 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2294 2295 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2296 self._match(TokenType.EQ) 2297 return self.expression( 2298 exp.PartitionedByProperty, 2299 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2300 ) 2301 2302 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2303 if self._match_text_seq("AND", "STATISTICS"): 2304 statistics = True 2305 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2306 statistics = False 2307 else: 2308 statistics = None 2309 2310 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2311 2312 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2313 if self._match_text_seq("SQL"): 2314 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2315 return None 2316 2317 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2318 if self._match_text_seq("SQL", "DATA"): 2319 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2320 return None 2321 2322 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2323 if self._match_text_seq("PRIMARY", "INDEX"): 2324 return exp.NoPrimaryIndexProperty() 2325 if self._match_text_seq("SQL"): 2326 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2327 return None 2328 2329 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2330 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2331 return exp.OnCommitProperty() 2332 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2333 return exp.OnCommitProperty(delete=True) 2334 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2335 2336 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2337 if self._match_text_seq("SQL", "DATA"): 2338 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2339 return None 2340 2341 def _parse_distkey(self) -> exp.DistKeyProperty: 2342 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2343 2344 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2345 table = self._parse_table(schema=True) 2346 2347 options = [] 2348 while self._match_texts(("INCLUDING", "EXCLUDING")): 2349 this = self._prev.text.upper() 2350 2351 id_var = self._parse_id_var() 2352 if not id_var: 2353 return None 2354 2355 options.append( 2356 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2357 ) 2358 2359 return self.expression(exp.LikeProperty, this=table, expressions=options) 2360 2361 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2362 return self.expression( 2363 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2364 ) 2365 2366 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2367 self._match(TokenType.EQ) 2368 return self.expression( 2369 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2370 ) 2371 2372 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2373 self._match_text_seq("WITH", "CONNECTION") 2374 return self.expression( 2375 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2376 ) 2377 2378 def _parse_returns(self) -> exp.ReturnsProperty: 2379 value: t.Optional[exp.Expression] 2380 null = None 2381 is_table = self._match(TokenType.TABLE) 2382 2383 if is_table: 2384 if self._match(TokenType.LT): 2385 value = self.expression( 2386 exp.Schema, 2387 this="TABLE", 2388 expressions=self._parse_csv(self._parse_struct_types), 2389 ) 2390 if not self._match(TokenType.GT): 2391 self.raise_error("Expecting >") 2392 else: 2393 value = self._parse_schema(exp.var("TABLE")) 2394 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2395 null = True 2396 value = None 2397 else: 2398 value = self._parse_types() 2399 2400 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2401 2402 def _parse_describe(self) -> exp.Describe: 2403 kind = self._match_set(self.CREATABLES) and self._prev.text 2404 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2405 if self._match(TokenType.DOT): 2406 style = None 2407 self._retreat(self._index - 2) 2408 this = self._parse_table(schema=True) 2409 properties = self._parse_properties() 2410 expressions = properties.expressions if properties else None 2411 return self.expression( 2412 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2413 ) 2414 2415 def _parse_insert(self) -> exp.Insert: 2416 comments = ensure_list(self._prev_comments) 2417 hint = self._parse_hint() 2418 overwrite = self._match(TokenType.OVERWRITE) 2419 ignore = self._match(TokenType.IGNORE) 2420 local = self._match_text_seq("LOCAL") 2421 alternative = None 2422 is_function = None 2423 2424 if self._match_text_seq("DIRECTORY"): 2425 this: t.Optional[exp.Expression] = self.expression( 2426 exp.Directory, 2427 this=self._parse_var_or_string(), 2428 local=local, 2429 row_format=self._parse_row_format(match_row=True), 2430 ) 2431 else: 2432 if self._match(TokenType.OR): 2433 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2434 2435 self._match(TokenType.INTO) 2436 comments += ensure_list(self._prev_comments) 2437 self._match(TokenType.TABLE) 2438 is_function = self._match(TokenType.FUNCTION) 2439 2440 this = ( 2441 self._parse_table(schema=True, parse_partition=True) 2442 if not is_function 2443 else self._parse_function() 2444 ) 2445 2446 returning = self._parse_returning() 2447 2448 return self.expression( 2449 exp.Insert, 2450 comments=comments, 2451 hint=hint, 2452 is_function=is_function, 2453 this=this, 2454 stored=self._match_text_seq("STORED") and self._parse_stored(), 2455 by_name=self._match_text_seq("BY", "NAME"), 2456 exists=self._parse_exists(), 2457 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2458 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2459 conflict=self._parse_on_conflict(), 2460 returning=returning or self._parse_returning(), 2461 overwrite=overwrite, 2462 alternative=alternative, 2463 ignore=ignore, 2464 ) 2465 2466 def _parse_kill(self) -> exp.Kill: 2467 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2468 2469 return self.expression( 2470 exp.Kill, 2471 this=self._parse_primary(), 2472 kind=kind, 2473 ) 2474 2475 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2476 conflict = self._match_text_seq("ON", "CONFLICT") 2477 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2478 2479 if not conflict and not duplicate: 2480 return None 2481 2482 conflict_keys = None 2483 constraint = None 2484 2485 if conflict: 2486 if self._match_text_seq("ON", "CONSTRAINT"): 2487 constraint = self._parse_id_var() 2488 elif self._match(TokenType.L_PAREN): 2489 conflict_keys = self._parse_csv(self._parse_id_var) 2490 self._match_r_paren() 2491 2492 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2493 if self._prev.token_type == TokenType.UPDATE: 2494 self._match(TokenType.SET) 2495 expressions = self._parse_csv(self._parse_equality) 2496 else: 2497 expressions = None 2498 2499 return self.expression( 2500 exp.OnConflict, 2501 duplicate=duplicate, 2502 expressions=expressions, 2503 action=action, 2504 conflict_keys=conflict_keys, 2505 constraint=constraint, 2506 ) 2507 2508 def _parse_returning(self) -> t.Optional[exp.Returning]: 2509 if not self._match(TokenType.RETURNING): 2510 return None 2511 return self.expression( 2512 exp.Returning, 2513 expressions=self._parse_csv(self._parse_expression), 2514 into=self._match(TokenType.INTO) and self._parse_table_part(), 2515 ) 2516 2517 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2518 if not self._match(TokenType.FORMAT): 2519 return None 2520 return self._parse_row_format() 2521 2522 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2523 index = self._index 2524 with_ = with_ or self._match_text_seq("WITH") 2525 2526 if not self._match(TokenType.SERDE_PROPERTIES): 2527 self._retreat(index) 2528 return None 2529 return self.expression( 2530 exp.SerdeProperties, 2531 **{ # type: ignore 2532 "expressions": self._parse_wrapped_properties(), 2533 "with": with_, 2534 }, 2535 ) 2536 2537 def _parse_row_format( 2538 self, match_row: bool = False 2539 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2540 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2541 return None 2542 2543 if self._match_text_seq("SERDE"): 2544 this = self._parse_string() 2545 2546 serde_properties = self._parse_serde_properties() 2547 2548 return self.expression( 2549 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2550 ) 2551 2552 self._match_text_seq("DELIMITED") 2553 2554 kwargs = {} 2555 2556 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2557 kwargs["fields"] = self._parse_string() 2558 if self._match_text_seq("ESCAPED", "BY"): 2559 kwargs["escaped"] = self._parse_string() 2560 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2561 kwargs["collection_items"] = self._parse_string() 2562 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2563 kwargs["map_keys"] = self._parse_string() 2564 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2565 kwargs["lines"] = self._parse_string() 2566 if self._match_text_seq("NULL", "DEFINED", "AS"): 2567 kwargs["null"] = self._parse_string() 2568 2569 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2570 2571 def _parse_load(self) -> exp.LoadData | exp.Command: 2572 if self._match_text_seq("DATA"): 2573 local = self._match_text_seq("LOCAL") 2574 self._match_text_seq("INPATH") 2575 inpath = self._parse_string() 2576 overwrite = self._match(TokenType.OVERWRITE) 2577 self._match_pair(TokenType.INTO, TokenType.TABLE) 2578 2579 return self.expression( 2580 exp.LoadData, 2581 this=self._parse_table(schema=True), 2582 local=local, 2583 overwrite=overwrite, 2584 inpath=inpath, 2585 partition=self._parse_partition(), 2586 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2587 serde=self._match_text_seq("SERDE") and self._parse_string(), 2588 ) 2589 return self._parse_as_command(self._prev) 2590 2591 def _parse_delete(self) -> exp.Delete: 2592 # This handles MySQL's "Multiple-Table Syntax" 2593 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2594 tables = None 2595 comments = self._prev_comments 2596 if not self._match(TokenType.FROM, advance=False): 2597 tables = self._parse_csv(self._parse_table) or None 2598 2599 returning = self._parse_returning() 2600 2601 return self.expression( 2602 exp.Delete, 2603 comments=comments, 2604 tables=tables, 2605 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2606 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2607 where=self._parse_where(), 2608 returning=returning or self._parse_returning(), 2609 limit=self._parse_limit(), 2610 ) 2611 2612 def _parse_update(self) -> exp.Update: 2613 comments = self._prev_comments 2614 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2615 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2616 returning = self._parse_returning() 2617 return self.expression( 2618 exp.Update, 2619 comments=comments, 2620 **{ # type: ignore 2621 "this": this, 2622 "expressions": expressions, 2623 "from": self._parse_from(joins=True), 2624 "where": self._parse_where(), 2625 "returning": returning or self._parse_returning(), 2626 "order": self._parse_order(), 2627 "limit": self._parse_limit(), 2628 }, 2629 ) 2630 2631 def _parse_uncache(self) -> exp.Uncache: 2632 if not self._match(TokenType.TABLE): 2633 self.raise_error("Expecting TABLE after UNCACHE") 2634 2635 return self.expression( 2636 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2637 ) 2638 2639 def _parse_cache(self) -> exp.Cache: 2640 lazy = self._match_text_seq("LAZY") 2641 self._match(TokenType.TABLE) 2642 table = self._parse_table(schema=True) 2643 2644 options = [] 2645 if self._match_text_seq("OPTIONS"): 2646 self._match_l_paren() 2647 k = self._parse_string() 2648 self._match(TokenType.EQ) 2649 v = self._parse_string() 2650 options = [k, v] 2651 self._match_r_paren() 2652 2653 self._match(TokenType.ALIAS) 2654 return self.expression( 2655 exp.Cache, 2656 this=table, 2657 lazy=lazy, 2658 options=options, 2659 expression=self._parse_select(nested=True), 2660 ) 2661 2662 def _parse_partition(self) -> t.Optional[exp.Partition]: 2663 if not self._match(TokenType.PARTITION): 2664 return None 2665 2666 return self.expression( 2667 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2668 ) 2669 2670 def _parse_value(self) -> t.Optional[exp.Tuple]: 2671 if self._match(TokenType.L_PAREN): 2672 expressions = self._parse_csv(self._parse_expression) 2673 self._match_r_paren() 2674 return self.expression(exp.Tuple, expressions=expressions) 2675 2676 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2677 expression = self._parse_expression() 2678 if expression: 2679 return self.expression(exp.Tuple, expressions=[expression]) 2680 return None 2681 2682 def _parse_projections(self) -> t.List[exp.Expression]: 2683 return self._parse_expressions() 2684 2685 def _parse_select( 2686 self, 2687 nested: bool = False, 2688 table: bool = False, 2689 parse_subquery_alias: bool = True, 2690 parse_set_operation: bool = True, 2691 ) -> t.Optional[exp.Expression]: 2692 cte = self._parse_with() 2693 2694 if cte: 2695 this = self._parse_statement() 2696 2697 if not this: 2698 self.raise_error("Failed to parse any statement following CTE") 2699 return cte 2700 2701 if "with" in this.arg_types: 2702 this.set("with", cte) 2703 else: 2704 self.raise_error(f"{this.key} does not support CTE") 2705 this = cte 2706 2707 return this 2708 2709 # duckdb supports leading with FROM x 2710 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2711 2712 if self._match(TokenType.SELECT): 2713 comments = self._prev_comments 2714 2715 hint = self._parse_hint() 2716 all_ = self._match(TokenType.ALL) 2717 distinct = self._match_set(self.DISTINCT_TOKENS) 2718 2719 kind = ( 2720 self._match(TokenType.ALIAS) 2721 and self._match_texts(("STRUCT", "VALUE")) 2722 and self._prev.text.upper() 2723 ) 2724 2725 if distinct: 2726 distinct = self.expression( 2727 exp.Distinct, 2728 on=self._parse_value() if self._match(TokenType.ON) else None, 2729 ) 2730 2731 if all_ and distinct: 2732 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2733 2734 limit = self._parse_limit(top=True) 2735 projections = self._parse_projections() 2736 2737 this = self.expression( 2738 exp.Select, 2739 kind=kind, 2740 hint=hint, 2741 distinct=distinct, 2742 expressions=projections, 2743 limit=limit, 2744 ) 2745 this.comments = comments 2746 2747 into = self._parse_into() 2748 if into: 2749 this.set("into", into) 2750 2751 if not from_: 2752 from_ = self._parse_from() 2753 2754 if from_: 2755 this.set("from", from_) 2756 2757 this = self._parse_query_modifiers(this) 2758 elif (table or nested) and self._match(TokenType.L_PAREN): 2759 if self._match(TokenType.PIVOT): 2760 this = self._parse_simplified_pivot() 2761 elif self._match(TokenType.FROM): 2762 this = exp.select("*").from_( 2763 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2764 ) 2765 else: 2766 this = ( 2767 self._parse_table() 2768 if table 2769 else self._parse_select(nested=True, parse_set_operation=False) 2770 ) 2771 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2772 2773 self._match_r_paren() 2774 2775 # We return early here so that the UNION isn't attached to the subquery by the 2776 # following call to _parse_set_operations, but instead becomes the parent node 2777 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2778 elif self._match(TokenType.VALUES, advance=False): 2779 this = self._parse_derived_table_values() 2780 elif from_: 2781 this = exp.select("*").from_(from_.this, copy=False) 2782 else: 2783 this = None 2784 2785 if parse_set_operation: 2786 return self._parse_set_operations(this) 2787 return this 2788 2789 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2790 if not skip_with_token and not self._match(TokenType.WITH): 2791 return None 2792 2793 comments = self._prev_comments 2794 recursive = self._match(TokenType.RECURSIVE) 2795 2796 expressions = [] 2797 while True: 2798 expressions.append(self._parse_cte()) 2799 2800 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2801 break 2802 else: 2803 self._match(TokenType.WITH) 2804 2805 return self.expression( 2806 exp.With, comments=comments, expressions=expressions, recursive=recursive 2807 ) 2808 2809 def _parse_cte(self) -> exp.CTE: 2810 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2811 if not alias or not alias.this: 2812 self.raise_error("Expected CTE to have alias") 2813 2814 self._match(TokenType.ALIAS) 2815 2816 if self._match_text_seq("NOT", "MATERIALIZED"): 2817 materialized = False 2818 elif self._match_text_seq("MATERIALIZED"): 2819 materialized = True 2820 else: 2821 materialized = None 2822 2823 return self.expression( 2824 exp.CTE, 2825 this=self._parse_wrapped(self._parse_statement), 2826 alias=alias, 2827 materialized=materialized, 2828 ) 2829 2830 def _parse_table_alias( 2831 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2832 ) -> t.Optional[exp.TableAlias]: 2833 any_token = self._match(TokenType.ALIAS) 2834 alias = ( 2835 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2836 or self._parse_string_as_identifier() 2837 ) 2838 2839 index = self._index 2840 if self._match(TokenType.L_PAREN): 2841 columns = self._parse_csv(self._parse_function_parameter) 2842 self._match_r_paren() if columns else self._retreat(index) 2843 else: 2844 columns = None 2845 2846 if not alias and not columns: 2847 return None 2848 2849 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2850 2851 # We bubble up comments from the Identifier to the TableAlias 2852 if isinstance(alias, exp.Identifier): 2853 table_alias.add_comments(alias.pop_comments()) 2854 2855 return table_alias 2856 2857 def _parse_subquery( 2858 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2859 ) -> t.Optional[exp.Subquery]: 2860 if not this: 2861 return None 2862 2863 return self.expression( 2864 exp.Subquery, 2865 this=this, 2866 pivots=self._parse_pivots(), 2867 alias=self._parse_table_alias() if parse_alias else None, 2868 ) 2869 2870 def _implicit_unnests_to_explicit(self, this: E) -> E: 2871 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2872 2873 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2874 for i, join in enumerate(this.args.get("joins") or []): 2875 table = join.this 2876 normalized_table = table.copy() 2877 normalized_table.meta["maybe_column"] = True 2878 normalized_table = _norm(normalized_table, dialect=self.dialect) 2879 2880 if isinstance(table, exp.Table) and not join.args.get("on"): 2881 if normalized_table.parts[0].name in refs: 2882 table_as_column = table.to_column() 2883 unnest = exp.Unnest(expressions=[table_as_column]) 2884 2885 # Table.to_column creates a parent Alias node that we want to convert to 2886 # a TableAlias and attach to the Unnest, so it matches the parser's output 2887 if isinstance(table.args.get("alias"), exp.TableAlias): 2888 table_as_column.replace(table_as_column.this) 2889 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2890 2891 table.replace(unnest) 2892 2893 refs.add(normalized_table.alias_or_name) 2894 2895 return this 2896 2897 def _parse_query_modifiers( 2898 self, this: t.Optional[exp.Expression] 2899 ) -> t.Optional[exp.Expression]: 2900 if isinstance(this, (exp.Query, exp.Table)): 2901 for join in self._parse_joins(): 2902 this.append("joins", join) 2903 for lateral in iter(self._parse_lateral, None): 2904 this.append("laterals", lateral) 2905 2906 while True: 2907 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2908 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2909 key, expression = parser(self) 2910 2911 if expression: 2912 this.set(key, expression) 2913 if key == "limit": 2914 offset = expression.args.pop("offset", None) 2915 2916 if offset: 2917 offset = exp.Offset(expression=offset) 2918 this.set("offset", offset) 2919 2920 limit_by_expressions = expression.expressions 2921 expression.set("expressions", None) 2922 offset.set("expressions", limit_by_expressions) 2923 continue 2924 break 2925 2926 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2927 this = self._implicit_unnests_to_explicit(this) 2928 2929 return this 2930 2931 def _parse_hint(self) -> t.Optional[exp.Hint]: 2932 if self._match(TokenType.HINT): 2933 hints = [] 2934 for hint in iter( 2935 lambda: self._parse_csv( 2936 lambda: self._parse_function() or self._parse_var(upper=True) 2937 ), 2938 [], 2939 ): 2940 hints.extend(hint) 2941 2942 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2943 self.raise_error("Expected */ after HINT") 2944 2945 return self.expression(exp.Hint, expressions=hints) 2946 2947 return None 2948 2949 def _parse_into(self) -> t.Optional[exp.Into]: 2950 if not self._match(TokenType.INTO): 2951 return None 2952 2953 temp = self._match(TokenType.TEMPORARY) 2954 unlogged = self._match_text_seq("UNLOGGED") 2955 self._match(TokenType.TABLE) 2956 2957 return self.expression( 2958 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2959 ) 2960 2961 def _parse_from( 2962 self, joins: bool = False, skip_from_token: bool = False 2963 ) -> t.Optional[exp.From]: 2964 if not skip_from_token and not self._match(TokenType.FROM): 2965 return None 2966 2967 return self.expression( 2968 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2969 ) 2970 2971 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2972 return self.expression( 2973 exp.MatchRecognizeMeasure, 2974 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2975 this=self._parse_expression(), 2976 ) 2977 2978 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2979 if not self._match(TokenType.MATCH_RECOGNIZE): 2980 return None 2981 2982 self._match_l_paren() 2983 2984 partition = self._parse_partition_by() 2985 order = self._parse_order() 2986 2987 measures = ( 2988 self._parse_csv(self._parse_match_recognize_measure) 2989 if self._match_text_seq("MEASURES") 2990 else None 2991 ) 2992 2993 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2994 rows = exp.var("ONE ROW PER MATCH") 2995 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2996 text = "ALL ROWS PER MATCH" 2997 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2998 text += " SHOW EMPTY MATCHES" 2999 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3000 text += " OMIT EMPTY MATCHES" 3001 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3002 text += " WITH UNMATCHED ROWS" 3003 rows = exp.var(text) 3004 else: 3005 rows = None 3006 3007 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3008 text = "AFTER MATCH SKIP" 3009 if self._match_text_seq("PAST", "LAST", "ROW"): 3010 text += " PAST LAST ROW" 3011 elif self._match_text_seq("TO", "NEXT", "ROW"): 3012 text += " TO NEXT ROW" 3013 elif self._match_text_seq("TO", "FIRST"): 3014 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3015 elif self._match_text_seq("TO", "LAST"): 3016 text += f" TO LAST {self._advance_any().text}" # type: ignore 3017 after = exp.var(text) 3018 else: 3019 after = None 3020 3021 if self._match_text_seq("PATTERN"): 3022 self._match_l_paren() 3023 3024 if not self._curr: 3025 self.raise_error("Expecting )", self._curr) 3026 3027 paren = 1 3028 start = self._curr 3029 3030 while self._curr and paren > 0: 3031 if self._curr.token_type == TokenType.L_PAREN: 3032 paren += 1 3033 if self._curr.token_type == TokenType.R_PAREN: 3034 paren -= 1 3035 3036 end = self._prev 3037 self._advance() 3038 3039 if paren > 0: 3040 self.raise_error("Expecting )", self._curr) 3041 3042 pattern = exp.var(self._find_sql(start, end)) 3043 else: 3044 pattern = None 3045 3046 define = ( 3047 self._parse_csv(self._parse_name_as_expression) 3048 if self._match_text_seq("DEFINE") 3049 else None 3050 ) 3051 3052 self._match_r_paren() 3053 3054 return self.expression( 3055 exp.MatchRecognize, 3056 partition_by=partition, 3057 order=order, 3058 measures=measures, 3059 rows=rows, 3060 after=after, 3061 pattern=pattern, 3062 define=define, 3063 alias=self._parse_table_alias(), 3064 ) 3065 3066 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3067 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3068 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3069 cross_apply = False 3070 3071 if cross_apply is not None: 3072 this = self._parse_select(table=True) 3073 view = None 3074 outer = None 3075 elif self._match(TokenType.LATERAL): 3076 this = self._parse_select(table=True) 3077 view = self._match(TokenType.VIEW) 3078 outer = self._match(TokenType.OUTER) 3079 else: 3080 return None 3081 3082 if not this: 3083 this = ( 3084 self._parse_unnest() 3085 or self._parse_function() 3086 or self._parse_id_var(any_token=False) 3087 ) 3088 3089 while self._match(TokenType.DOT): 3090 this = exp.Dot( 3091 this=this, 3092 expression=self._parse_function() or self._parse_id_var(any_token=False), 3093 ) 3094 3095 if view: 3096 table = self._parse_id_var(any_token=False) 3097 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3098 table_alias: t.Optional[exp.TableAlias] = self.expression( 3099 exp.TableAlias, this=table, columns=columns 3100 ) 3101 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3102 # We move the alias from the lateral's child node to the lateral itself 3103 table_alias = this.args["alias"].pop() 3104 else: 3105 table_alias = self._parse_table_alias() 3106 3107 return self.expression( 3108 exp.Lateral, 3109 this=this, 3110 view=view, 3111 outer=outer, 3112 alias=table_alias, 3113 cross_apply=cross_apply, 3114 ) 3115 3116 def _parse_join_parts( 3117 self, 3118 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3119 return ( 3120 self._match_set(self.JOIN_METHODS) and self._prev, 3121 self._match_set(self.JOIN_SIDES) and self._prev, 3122 self._match_set(self.JOIN_KINDS) and self._prev, 3123 ) 3124 3125 def _parse_join( 3126 self, skip_join_token: bool = False, parse_bracket: bool = False 3127 ) -> t.Optional[exp.Join]: 3128 if self._match(TokenType.COMMA): 3129 return self.expression(exp.Join, this=self._parse_table()) 3130 3131 index = self._index 3132 method, side, kind = self._parse_join_parts() 3133 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3134 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3135 3136 if not skip_join_token and not join: 3137 self._retreat(index) 3138 kind = None 3139 method = None 3140 side = None 3141 3142 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3143 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3144 3145 if not skip_join_token and not join and not outer_apply and not cross_apply: 3146 return None 3147 3148 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3149 3150 if method: 3151 kwargs["method"] = method.text 3152 if side: 3153 kwargs["side"] = side.text 3154 if kind: 3155 kwargs["kind"] = kind.text 3156 if hint: 3157 kwargs["hint"] = hint 3158 3159 if self._match(TokenType.MATCH_CONDITION): 3160 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3161 3162 if self._match(TokenType.ON): 3163 kwargs["on"] = self._parse_assignment() 3164 elif self._match(TokenType.USING): 3165 kwargs["using"] = self._parse_wrapped_id_vars() 3166 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3167 kind and kind.token_type == TokenType.CROSS 3168 ): 3169 index = self._index 3170 joins: t.Optional[list] = list(self._parse_joins()) 3171 3172 if joins and self._match(TokenType.ON): 3173 kwargs["on"] = self._parse_assignment() 3174 elif joins and self._match(TokenType.USING): 3175 kwargs["using"] = self._parse_wrapped_id_vars() 3176 else: 3177 joins = None 3178 self._retreat(index) 3179 3180 kwargs["this"].set("joins", joins if joins else None) 3181 3182 comments = [c for token in (method, side, kind) if token for c in token.comments] 3183 return self.expression(exp.Join, comments=comments, **kwargs) 3184 3185 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3186 this = self._parse_assignment() 3187 3188 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3189 return this 3190 3191 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3192 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3193 3194 return this 3195 3196 def _parse_index_params(self) -> exp.IndexParameters: 3197 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3198 3199 if self._match(TokenType.L_PAREN, advance=False): 3200 columns = self._parse_wrapped_csv(self._parse_with_operator) 3201 else: 3202 columns = None 3203 3204 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3205 partition_by = self._parse_partition_by() 3206 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3207 tablespace = ( 3208 self._parse_var(any_token=True) 3209 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3210 else None 3211 ) 3212 where = self._parse_where() 3213 3214 on = self._parse_field() if self._match(TokenType.ON) else None 3215 3216 return self.expression( 3217 exp.IndexParameters, 3218 using=using, 3219 columns=columns, 3220 include=include, 3221 partition_by=partition_by, 3222 where=where, 3223 with_storage=with_storage, 3224 tablespace=tablespace, 3225 on=on, 3226 ) 3227 3228 def _parse_index( 3229 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3230 ) -> t.Optional[exp.Index]: 3231 if index or anonymous: 3232 unique = None 3233 primary = None 3234 amp = None 3235 3236 self._match(TokenType.ON) 3237 self._match(TokenType.TABLE) # hive 3238 table = self._parse_table_parts(schema=True) 3239 else: 3240 unique = self._match(TokenType.UNIQUE) 3241 primary = self._match_text_seq("PRIMARY") 3242 amp = self._match_text_seq("AMP") 3243 3244 if not self._match(TokenType.INDEX): 3245 return None 3246 3247 index = self._parse_id_var() 3248 table = None 3249 3250 params = self._parse_index_params() 3251 3252 return self.expression( 3253 exp.Index, 3254 this=index, 3255 table=table, 3256 unique=unique, 3257 primary=primary, 3258 amp=amp, 3259 params=params, 3260 ) 3261 3262 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3263 hints: t.List[exp.Expression] = [] 3264 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3265 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3266 hints.append( 3267 self.expression( 3268 exp.WithTableHint, 3269 expressions=self._parse_csv( 3270 lambda: self._parse_function() or self._parse_var(any_token=True) 3271 ), 3272 ) 3273 ) 3274 self._match_r_paren() 3275 else: 3276 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3277 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3278 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3279 3280 self._match_set((TokenType.INDEX, TokenType.KEY)) 3281 if self._match(TokenType.FOR): 3282 hint.set("target", self._advance_any() and self._prev.text.upper()) 3283 3284 hint.set("expressions", self._parse_wrapped_id_vars()) 3285 hints.append(hint) 3286 3287 return hints or None 3288 3289 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3290 return ( 3291 (not schema and self._parse_function(optional_parens=False)) 3292 or self._parse_id_var(any_token=False) 3293 or self._parse_string_as_identifier() 3294 or self._parse_placeholder() 3295 ) 3296 3297 def _parse_table_parts( 3298 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3299 ) -> exp.Table: 3300 catalog = None 3301 db = None 3302 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3303 3304 while self._match(TokenType.DOT): 3305 if catalog: 3306 # This allows nesting the table in arbitrarily many dot expressions if needed 3307 table = self.expression( 3308 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3309 ) 3310 else: 3311 catalog = db 3312 db = table 3313 # "" used for tsql FROM a..b case 3314 table = self._parse_table_part(schema=schema) or "" 3315 3316 if ( 3317 wildcard 3318 and self._is_connected() 3319 and (isinstance(table, exp.Identifier) or not table) 3320 and self._match(TokenType.STAR) 3321 ): 3322 if isinstance(table, exp.Identifier): 3323 table.args["this"] += "*" 3324 else: 3325 table = exp.Identifier(this="*") 3326 3327 # We bubble up comments from the Identifier to the Table 3328 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3329 3330 if is_db_reference: 3331 catalog = db 3332 db = table 3333 table = None 3334 3335 if not table and not is_db_reference: 3336 self.raise_error(f"Expected table name but got {self._curr}") 3337 if not db and is_db_reference: 3338 self.raise_error(f"Expected database name but got {self._curr}") 3339 3340 return self.expression( 3341 exp.Table, 3342 comments=comments, 3343 this=table, 3344 db=db, 3345 catalog=catalog, 3346 pivots=self._parse_pivots(), 3347 ) 3348 3349 def _parse_table( 3350 self, 3351 schema: bool = False, 3352 joins: bool = False, 3353 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3354 parse_bracket: bool = False, 3355 is_db_reference: bool = False, 3356 parse_partition: bool = False, 3357 ) -> t.Optional[exp.Expression]: 3358 lateral = self._parse_lateral() 3359 if lateral: 3360 return lateral 3361 3362 unnest = self._parse_unnest() 3363 if unnest: 3364 return unnest 3365 3366 values = self._parse_derived_table_values() 3367 if values: 3368 return values 3369 3370 subquery = self._parse_select(table=True) 3371 if subquery: 3372 if not subquery.args.get("pivots"): 3373 subquery.set("pivots", self._parse_pivots()) 3374 return subquery 3375 3376 bracket = parse_bracket and self._parse_bracket(None) 3377 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3378 3379 only = self._match(TokenType.ONLY) 3380 3381 this = t.cast( 3382 exp.Expression, 3383 bracket 3384 or self._parse_bracket( 3385 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3386 ), 3387 ) 3388 3389 if only: 3390 this.set("only", only) 3391 3392 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3393 self._match_text_seq("*") 3394 3395 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3396 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3397 this.set("partition", self._parse_partition()) 3398 3399 if schema: 3400 return self._parse_schema(this=this) 3401 3402 version = self._parse_version() 3403 3404 if version: 3405 this.set("version", version) 3406 3407 if self.dialect.ALIAS_POST_TABLESAMPLE: 3408 table_sample = self._parse_table_sample() 3409 3410 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3411 if alias: 3412 this.set("alias", alias) 3413 3414 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3415 return self.expression( 3416 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3417 ) 3418 3419 this.set("hints", self._parse_table_hints()) 3420 3421 if not this.args.get("pivots"): 3422 this.set("pivots", self._parse_pivots()) 3423 3424 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3425 table_sample = self._parse_table_sample() 3426 3427 if table_sample: 3428 table_sample.set("this", this) 3429 this = table_sample 3430 3431 if joins: 3432 for join in self._parse_joins(): 3433 this.append("joins", join) 3434 3435 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3436 this.set("ordinality", True) 3437 this.set("alias", self._parse_table_alias()) 3438 3439 return this 3440 3441 def _parse_version(self) -> t.Optional[exp.Version]: 3442 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3443 this = "TIMESTAMP" 3444 elif self._match(TokenType.VERSION_SNAPSHOT): 3445 this = "VERSION" 3446 else: 3447 return None 3448 3449 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3450 kind = self._prev.text.upper() 3451 start = self._parse_bitwise() 3452 self._match_texts(("TO", "AND")) 3453 end = self._parse_bitwise() 3454 expression: t.Optional[exp.Expression] = self.expression( 3455 exp.Tuple, expressions=[start, end] 3456 ) 3457 elif self._match_text_seq("CONTAINED", "IN"): 3458 kind = "CONTAINED IN" 3459 expression = self.expression( 3460 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3461 ) 3462 elif self._match(TokenType.ALL): 3463 kind = "ALL" 3464 expression = None 3465 else: 3466 self._match_text_seq("AS", "OF") 3467 kind = "AS OF" 3468 expression = self._parse_type() 3469 3470 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3471 3472 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3473 if not self._match(TokenType.UNNEST): 3474 return None 3475 3476 expressions = self._parse_wrapped_csv(self._parse_equality) 3477 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3478 3479 alias = self._parse_table_alias() if with_alias else None 3480 3481 if alias: 3482 if self.dialect.UNNEST_COLUMN_ONLY: 3483 if alias.args.get("columns"): 3484 self.raise_error("Unexpected extra column alias in unnest.") 3485 3486 alias.set("columns", [alias.this]) 3487 alias.set("this", None) 3488 3489 columns = alias.args.get("columns") or [] 3490 if offset and len(expressions) < len(columns): 3491 offset = columns.pop() 3492 3493 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3494 self._match(TokenType.ALIAS) 3495 offset = self._parse_id_var( 3496 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3497 ) or exp.to_identifier("offset") 3498 3499 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3500 3501 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3502 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3503 if not is_derived and not self._match_text_seq("VALUES"): 3504 return None 3505 3506 expressions = self._parse_csv(self._parse_value) 3507 alias = self._parse_table_alias() 3508 3509 if is_derived: 3510 self._match_r_paren() 3511 3512 return self.expression( 3513 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3514 ) 3515 3516 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3517 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3518 as_modifier and self._match_text_seq("USING", "SAMPLE") 3519 ): 3520 return None 3521 3522 bucket_numerator = None 3523 bucket_denominator = None 3524 bucket_field = None 3525 percent = None 3526 size = None 3527 seed = None 3528 3529 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3530 matched_l_paren = self._match(TokenType.L_PAREN) 3531 3532 if self.TABLESAMPLE_CSV: 3533 num = None 3534 expressions = self._parse_csv(self._parse_primary) 3535 else: 3536 expressions = None 3537 num = ( 3538 self._parse_factor() 3539 if self._match(TokenType.NUMBER, advance=False) 3540 else self._parse_primary() or self._parse_placeholder() 3541 ) 3542 3543 if self._match_text_seq("BUCKET"): 3544 bucket_numerator = self._parse_number() 3545 self._match_text_seq("OUT", "OF") 3546 bucket_denominator = bucket_denominator = self._parse_number() 3547 self._match(TokenType.ON) 3548 bucket_field = self._parse_field() 3549 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3550 percent = num 3551 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3552 size = num 3553 else: 3554 percent = num 3555 3556 if matched_l_paren: 3557 self._match_r_paren() 3558 3559 if self._match(TokenType.L_PAREN): 3560 method = self._parse_var(upper=True) 3561 seed = self._match(TokenType.COMMA) and self._parse_number() 3562 self._match_r_paren() 3563 elif self._match_texts(("SEED", "REPEATABLE")): 3564 seed = self._parse_wrapped(self._parse_number) 3565 3566 if not method and self.DEFAULT_SAMPLING_METHOD: 3567 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3568 3569 return self.expression( 3570 exp.TableSample, 3571 expressions=expressions, 3572 method=method, 3573 bucket_numerator=bucket_numerator, 3574 bucket_denominator=bucket_denominator, 3575 bucket_field=bucket_field, 3576 percent=percent, 3577 size=size, 3578 seed=seed, 3579 ) 3580 3581 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3582 return list(iter(self._parse_pivot, None)) or None 3583 3584 def _parse_joins(self) -> t.Iterator[exp.Join]: 3585 return iter(self._parse_join, None) 3586 3587 # https://duckdb.org/docs/sql/statements/pivot 3588 def _parse_simplified_pivot(self) -> exp.Pivot: 3589 def _parse_on() -> t.Optional[exp.Expression]: 3590 this = self._parse_bitwise() 3591 return self._parse_in(this) if self._match(TokenType.IN) else this 3592 3593 this = self._parse_table() 3594 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3595 using = self._match(TokenType.USING) and self._parse_csv( 3596 lambda: self._parse_alias(self._parse_function()) 3597 ) 3598 group = self._parse_group() 3599 return self.expression( 3600 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3601 ) 3602 3603 def _parse_pivot_in(self) -> exp.In: 3604 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3605 this = self._parse_assignment() 3606 3607 self._match(TokenType.ALIAS) 3608 alias = self._parse_field() 3609 if alias: 3610 return self.expression(exp.PivotAlias, this=this, alias=alias) 3611 3612 return this 3613 3614 value = self._parse_column() 3615 3616 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3617 self.raise_error("Expecting IN (") 3618 3619 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3620 3621 self._match_r_paren() 3622 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3623 3624 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3625 index = self._index 3626 include_nulls = None 3627 3628 if self._match(TokenType.PIVOT): 3629 unpivot = False 3630 elif self._match(TokenType.UNPIVOT): 3631 unpivot = True 3632 3633 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3634 if self._match_text_seq("INCLUDE", "NULLS"): 3635 include_nulls = True 3636 elif self._match_text_seq("EXCLUDE", "NULLS"): 3637 include_nulls = False 3638 else: 3639 return None 3640 3641 expressions = [] 3642 3643 if not self._match(TokenType.L_PAREN): 3644 self._retreat(index) 3645 return None 3646 3647 if unpivot: 3648 expressions = self._parse_csv(self._parse_column) 3649 else: 3650 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3651 3652 if not expressions: 3653 self.raise_error("Failed to parse PIVOT's aggregation list") 3654 3655 if not self._match(TokenType.FOR): 3656 self.raise_error("Expecting FOR") 3657 3658 field = self._parse_pivot_in() 3659 3660 self._match_r_paren() 3661 3662 pivot = self.expression( 3663 exp.Pivot, 3664 expressions=expressions, 3665 field=field, 3666 unpivot=unpivot, 3667 include_nulls=include_nulls, 3668 ) 3669 3670 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3671 pivot.set("alias", self._parse_table_alias()) 3672 3673 if not unpivot: 3674 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3675 3676 columns: t.List[exp.Expression] = [] 3677 for fld in pivot.args["field"].expressions: 3678 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3679 for name in names: 3680 if self.PREFIXED_PIVOT_COLUMNS: 3681 name = f"{name}_{field_name}" if name else field_name 3682 else: 3683 name = f"{field_name}_{name}" if name else field_name 3684 3685 columns.append(exp.to_identifier(name)) 3686 3687 pivot.set("columns", columns) 3688 3689 return pivot 3690 3691 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3692 return [agg.alias for agg in aggregations] 3693 3694 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3695 if not skip_where_token and not self._match(TokenType.PREWHERE): 3696 return None 3697 3698 return self.expression( 3699 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3700 ) 3701 3702 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3703 if not skip_where_token and not self._match(TokenType.WHERE): 3704 return None 3705 3706 return self.expression( 3707 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3708 ) 3709 3710 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3711 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3712 return None 3713 3714 elements: t.Dict[str, t.Any] = defaultdict(list) 3715 3716 if self._match(TokenType.ALL): 3717 elements["all"] = True 3718 elif self._match(TokenType.DISTINCT): 3719 elements["all"] = False 3720 3721 while True: 3722 expressions = self._parse_csv( 3723 lambda: None 3724 if self._match(TokenType.ROLLUP, advance=False) 3725 else self._parse_assignment() 3726 ) 3727 if expressions: 3728 elements["expressions"].extend(expressions) 3729 3730 grouping_sets = self._parse_grouping_sets() 3731 if grouping_sets: 3732 elements["grouping_sets"].extend(grouping_sets) 3733 3734 rollup = None 3735 cube = None 3736 totals = None 3737 3738 index = self._index 3739 with_ = self._match(TokenType.WITH) 3740 if self._match(TokenType.ROLLUP): 3741 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3742 elements["rollup"].extend(ensure_list(rollup)) 3743 3744 if self._match(TokenType.CUBE): 3745 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3746 elements["cube"].extend(ensure_list(cube)) 3747 3748 if self._match_text_seq("TOTALS"): 3749 totals = True 3750 elements["totals"] = True # type: ignore 3751 3752 if not (grouping_sets or rollup or cube or totals): 3753 if with_: 3754 self._retreat(index) 3755 break 3756 3757 return self.expression(exp.Group, **elements) # type: ignore 3758 3759 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3760 if not self._match(TokenType.GROUPING_SETS): 3761 return None 3762 3763 return self._parse_wrapped_csv(self._parse_grouping_set) 3764 3765 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3766 if self._match(TokenType.L_PAREN): 3767 grouping_set = self._parse_csv(self._parse_column) 3768 self._match_r_paren() 3769 return self.expression(exp.Tuple, expressions=grouping_set) 3770 3771 return self._parse_column() 3772 3773 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3774 if not skip_having_token and not self._match(TokenType.HAVING): 3775 return None 3776 return self.expression(exp.Having, this=self._parse_assignment()) 3777 3778 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3779 if not self._match(TokenType.QUALIFY): 3780 return None 3781 return self.expression(exp.Qualify, this=self._parse_assignment()) 3782 3783 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3784 if skip_start_token: 3785 start = None 3786 elif self._match(TokenType.START_WITH): 3787 start = self._parse_assignment() 3788 else: 3789 return None 3790 3791 self._match(TokenType.CONNECT_BY) 3792 nocycle = self._match_text_seq("NOCYCLE") 3793 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3794 exp.Prior, this=self._parse_bitwise() 3795 ) 3796 connect = self._parse_assignment() 3797 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3798 3799 if not start and self._match(TokenType.START_WITH): 3800 start = self._parse_assignment() 3801 3802 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3803 3804 def _parse_name_as_expression(self) -> exp.Alias: 3805 return self.expression( 3806 exp.Alias, 3807 alias=self._parse_id_var(any_token=True), 3808 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3809 ) 3810 3811 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3812 if self._match_text_seq("INTERPOLATE"): 3813 return self._parse_wrapped_csv(self._parse_name_as_expression) 3814 return None 3815 3816 def _parse_order( 3817 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3818 ) -> t.Optional[exp.Expression]: 3819 siblings = None 3820 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3821 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3822 return this 3823 3824 siblings = True 3825 3826 return self.expression( 3827 exp.Order, 3828 this=this, 3829 expressions=self._parse_csv(self._parse_ordered), 3830 interpolate=self._parse_interpolate(), 3831 siblings=siblings, 3832 ) 3833 3834 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3835 if not self._match(token): 3836 return None 3837 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3838 3839 def _parse_ordered( 3840 self, parse_method: t.Optional[t.Callable] = None 3841 ) -> t.Optional[exp.Ordered]: 3842 this = parse_method() if parse_method else self._parse_assignment() 3843 if not this: 3844 return None 3845 3846 asc = self._match(TokenType.ASC) 3847 desc = self._match(TokenType.DESC) or (asc and False) 3848 3849 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3850 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3851 3852 nulls_first = is_nulls_first or False 3853 explicitly_null_ordered = is_nulls_first or is_nulls_last 3854 3855 if ( 3856 not explicitly_null_ordered 3857 and ( 3858 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3859 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3860 ) 3861 and self.dialect.NULL_ORDERING != "nulls_are_last" 3862 ): 3863 nulls_first = True 3864 3865 if self._match_text_seq("WITH", "FILL"): 3866 with_fill = self.expression( 3867 exp.WithFill, 3868 **{ # type: ignore 3869 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3870 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3871 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3872 }, 3873 ) 3874 else: 3875 with_fill = None 3876 3877 return self.expression( 3878 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3879 ) 3880 3881 def _parse_limit( 3882 self, 3883 this: t.Optional[exp.Expression] = None, 3884 top: bool = False, 3885 skip_limit_token: bool = False, 3886 ) -> t.Optional[exp.Expression]: 3887 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3888 comments = self._prev_comments 3889 if top: 3890 limit_paren = self._match(TokenType.L_PAREN) 3891 expression = self._parse_term() if limit_paren else self._parse_number() 3892 3893 if limit_paren: 3894 self._match_r_paren() 3895 else: 3896 expression = self._parse_term() 3897 3898 if self._match(TokenType.COMMA): 3899 offset = expression 3900 expression = self._parse_term() 3901 else: 3902 offset = None 3903 3904 limit_exp = self.expression( 3905 exp.Limit, 3906 this=this, 3907 expression=expression, 3908 offset=offset, 3909 comments=comments, 3910 expressions=self._parse_limit_by(), 3911 ) 3912 3913 return limit_exp 3914 3915 if self._match(TokenType.FETCH): 3916 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3917 direction = self._prev.text.upper() if direction else "FIRST" 3918 3919 count = self._parse_field(tokens=self.FETCH_TOKENS) 3920 percent = self._match(TokenType.PERCENT) 3921 3922 self._match_set((TokenType.ROW, TokenType.ROWS)) 3923 3924 only = self._match_text_seq("ONLY") 3925 with_ties = self._match_text_seq("WITH", "TIES") 3926 3927 if only and with_ties: 3928 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3929 3930 return self.expression( 3931 exp.Fetch, 3932 direction=direction, 3933 count=count, 3934 percent=percent, 3935 with_ties=with_ties, 3936 ) 3937 3938 return this 3939 3940 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3941 if not self._match(TokenType.OFFSET): 3942 return this 3943 3944 count = self._parse_term() 3945 self._match_set((TokenType.ROW, TokenType.ROWS)) 3946 3947 return self.expression( 3948 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3949 ) 3950 3951 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3952 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3953 3954 def _parse_locks(self) -> t.List[exp.Lock]: 3955 locks = [] 3956 while True: 3957 if self._match_text_seq("FOR", "UPDATE"): 3958 update = True 3959 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3960 "LOCK", "IN", "SHARE", "MODE" 3961 ): 3962 update = False 3963 else: 3964 break 3965 3966 expressions = None 3967 if self._match_text_seq("OF"): 3968 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3969 3970 wait: t.Optional[bool | exp.Expression] = None 3971 if self._match_text_seq("NOWAIT"): 3972 wait = True 3973 elif self._match_text_seq("WAIT"): 3974 wait = self._parse_primary() 3975 elif self._match_text_seq("SKIP", "LOCKED"): 3976 wait = False 3977 3978 locks.append( 3979 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3980 ) 3981 3982 return locks 3983 3984 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3985 while this and self._match_set(self.SET_OPERATIONS): 3986 token_type = self._prev.token_type 3987 3988 if token_type == TokenType.UNION: 3989 operation: t.Type[exp.SetOperation] = exp.Union 3990 elif token_type == TokenType.EXCEPT: 3991 operation = exp.Except 3992 else: 3993 operation = exp.Intersect 3994 3995 comments = self._prev.comments 3996 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3997 by_name = self._match_text_seq("BY", "NAME") 3998 expression = self._parse_select(nested=True, parse_set_operation=False) 3999 4000 this = self.expression( 4001 operation, 4002 comments=comments, 4003 this=this, 4004 distinct=distinct, 4005 by_name=by_name, 4006 expression=expression, 4007 ) 4008 4009 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4010 expression = this.expression 4011 4012 if expression: 4013 for arg in self.SET_OP_MODIFIERS: 4014 expr = expression.args.get(arg) 4015 if expr: 4016 this.set(arg, expr.pop()) 4017 4018 return this 4019 4020 def _parse_expression(self) -> t.Optional[exp.Expression]: 4021 return self._parse_alias(self._parse_assignment()) 4022 4023 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4024 this = self._parse_disjunction() 4025 4026 while self._match_set(self.ASSIGNMENT): 4027 this = self.expression( 4028 self.ASSIGNMENT[self._prev.token_type], 4029 this=this, 4030 comments=self._prev_comments, 4031 expression=self._parse_assignment(), 4032 ) 4033 4034 return this 4035 4036 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4037 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4038 4039 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4040 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4041 4042 def _parse_equality(self) -> t.Optional[exp.Expression]: 4043 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4044 4045 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4046 return self._parse_tokens(self._parse_range, self.COMPARISON) 4047 4048 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4049 this = this or self._parse_bitwise() 4050 negate = self._match(TokenType.NOT) 4051 4052 if self._match_set(self.RANGE_PARSERS): 4053 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4054 if not expression: 4055 return this 4056 4057 this = expression 4058 elif self._match(TokenType.ISNULL): 4059 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4060 4061 # Postgres supports ISNULL and NOTNULL for conditions. 4062 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4063 if self._match(TokenType.NOTNULL): 4064 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4065 this = self.expression(exp.Not, this=this) 4066 4067 if negate: 4068 this = self.expression(exp.Not, this=this) 4069 4070 if self._match(TokenType.IS): 4071 this = self._parse_is(this) 4072 4073 return this 4074 4075 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4076 index = self._index - 1 4077 negate = self._match(TokenType.NOT) 4078 4079 if self._match_text_seq("DISTINCT", "FROM"): 4080 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4081 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4082 4083 expression = self._parse_null() or self._parse_boolean() 4084 if not expression: 4085 self._retreat(index) 4086 return None 4087 4088 this = self.expression(exp.Is, this=this, expression=expression) 4089 return self.expression(exp.Not, this=this) if negate else this 4090 4091 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4092 unnest = self._parse_unnest(with_alias=False) 4093 if unnest: 4094 this = self.expression(exp.In, this=this, unnest=unnest) 4095 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4096 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4097 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4098 4099 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4100 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4101 else: 4102 this = self.expression(exp.In, this=this, expressions=expressions) 4103 4104 if matched_l_paren: 4105 self._match_r_paren(this) 4106 elif not self._match(TokenType.R_BRACKET, expression=this): 4107 self.raise_error("Expecting ]") 4108 else: 4109 this = self.expression(exp.In, this=this, field=self._parse_field()) 4110 4111 return this 4112 4113 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4114 low = self._parse_bitwise() 4115 self._match(TokenType.AND) 4116 high = self._parse_bitwise() 4117 return self.expression(exp.Between, this=this, low=low, high=high) 4118 4119 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4120 if not self._match(TokenType.ESCAPE): 4121 return this 4122 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4123 4124 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4125 index = self._index 4126 4127 if not self._match(TokenType.INTERVAL) and match_interval: 4128 return None 4129 4130 if self._match(TokenType.STRING, advance=False): 4131 this = self._parse_primary() 4132 else: 4133 this = self._parse_term() 4134 4135 if not this or ( 4136 isinstance(this, exp.Column) 4137 and not this.table 4138 and not this.this.quoted 4139 and this.name.upper() == "IS" 4140 ): 4141 self._retreat(index) 4142 return None 4143 4144 unit = self._parse_function() or ( 4145 not self._match(TokenType.ALIAS, advance=False) 4146 and self._parse_var(any_token=True, upper=True) 4147 ) 4148 4149 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4150 # each INTERVAL expression into this canonical form so it's easy to transpile 4151 if this and this.is_number: 4152 this = exp.Literal.string(this.to_py()) 4153 elif this and this.is_string: 4154 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4155 if len(parts) == 1: 4156 if unit: 4157 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4158 self._retreat(self._index - 1) 4159 4160 this = exp.Literal.string(parts[0][0]) 4161 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4162 4163 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4164 unit = self.expression( 4165 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4166 ) 4167 4168 interval = self.expression(exp.Interval, this=this, unit=unit) 4169 4170 index = self._index 4171 self._match(TokenType.PLUS) 4172 4173 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4174 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4175 return self.expression( 4176 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4177 ) 4178 4179 self._retreat(index) 4180 return interval 4181 4182 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4183 this = self._parse_term() 4184 4185 while True: 4186 if self._match_set(self.BITWISE): 4187 this = self.expression( 4188 self.BITWISE[self._prev.token_type], 4189 this=this, 4190 expression=self._parse_term(), 4191 ) 4192 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4193 this = self.expression( 4194 exp.DPipe, 4195 this=this, 4196 expression=self._parse_term(), 4197 safe=not self.dialect.STRICT_STRING_CONCAT, 4198 ) 4199 elif self._match(TokenType.DQMARK): 4200 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4201 elif self._match_pair(TokenType.LT, TokenType.LT): 4202 this = self.expression( 4203 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4204 ) 4205 elif self._match_pair(TokenType.GT, TokenType.GT): 4206 this = self.expression( 4207 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4208 ) 4209 else: 4210 break 4211 4212 return this 4213 4214 def _parse_term(self) -> t.Optional[exp.Expression]: 4215 return self._parse_tokens(self._parse_factor, self.TERM) 4216 4217 def _parse_factor(self) -> t.Optional[exp.Expression]: 4218 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4219 this = parse_method() 4220 4221 while self._match_set(self.FACTOR): 4222 klass = self.FACTOR[self._prev.token_type] 4223 comments = self._prev_comments 4224 expression = parse_method() 4225 4226 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4227 self._retreat(self._index - 1) 4228 return this 4229 4230 this = self.expression(klass, this=this, comments=comments, expression=expression) 4231 4232 if isinstance(this, exp.Div): 4233 this.args["typed"] = self.dialect.TYPED_DIVISION 4234 this.args["safe"] = self.dialect.SAFE_DIVISION 4235 4236 return this 4237 4238 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4239 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4240 4241 def _parse_unary(self) -> t.Optional[exp.Expression]: 4242 if self._match_set(self.UNARY_PARSERS): 4243 return self.UNARY_PARSERS[self._prev.token_type](self) 4244 return self._parse_at_time_zone(self._parse_type()) 4245 4246 def _parse_type( 4247 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4248 ) -> t.Optional[exp.Expression]: 4249 interval = parse_interval and self._parse_interval() 4250 if interval: 4251 return interval 4252 4253 index = self._index 4254 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4255 4256 if data_type: 4257 index2 = self._index 4258 this = self._parse_primary() 4259 4260 if isinstance(this, exp.Literal): 4261 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4262 if parser: 4263 return parser(self, this, data_type) 4264 4265 return self.expression(exp.Cast, this=this, to=data_type) 4266 4267 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4268 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4269 # 4270 # If the index difference here is greater than 1, that means the parser itself must have 4271 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4272 # 4273 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4274 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4275 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4276 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4277 # 4278 # In these cases, we don't really want to return the converted type, but instead retreat 4279 # and try to parse a Column or Identifier in the section below. 4280 if data_type.expressions and index2 - index > 1: 4281 self._retreat(index2) 4282 return self._parse_column_ops(data_type) 4283 4284 self._retreat(index) 4285 4286 if fallback_to_identifier: 4287 return self._parse_id_var() 4288 4289 this = self._parse_column() 4290 return this and self._parse_column_ops(this) 4291 4292 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4293 this = self._parse_type() 4294 if not this: 4295 return None 4296 4297 if isinstance(this, exp.Column) and not this.table: 4298 this = exp.var(this.name.upper()) 4299 4300 return self.expression( 4301 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4302 ) 4303 4304 def _parse_types( 4305 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4306 ) -> t.Optional[exp.Expression]: 4307 index = self._index 4308 4309 this: t.Optional[exp.Expression] = None 4310 prefix = self._match_text_seq("SYSUDTLIB", ".") 4311 4312 if not self._match_set(self.TYPE_TOKENS): 4313 identifier = allow_identifiers and self._parse_id_var( 4314 any_token=False, tokens=(TokenType.VAR,) 4315 ) 4316 if isinstance(identifier, exp.Identifier): 4317 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4318 4319 if len(tokens) != 1: 4320 self.raise_error("Unexpected identifier", self._prev) 4321 4322 if tokens[0].token_type in self.TYPE_TOKENS: 4323 self._prev = tokens[0] 4324 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4325 type_name = identifier.name 4326 4327 while self._match(TokenType.DOT): 4328 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4329 4330 this = exp.DataType.build(type_name, udt=True) 4331 else: 4332 self._retreat(self._index - 1) 4333 return None 4334 else: 4335 return None 4336 4337 type_token = self._prev.token_type 4338 4339 if type_token == TokenType.PSEUDO_TYPE: 4340 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4341 4342 if type_token == TokenType.OBJECT_IDENTIFIER: 4343 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4344 4345 # https://materialize.com/docs/sql/types/map/ 4346 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4347 key_type = self._parse_types( 4348 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4349 ) 4350 if not self._match(TokenType.FARROW): 4351 self._retreat(index) 4352 return None 4353 4354 value_type = self._parse_types( 4355 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4356 ) 4357 if not self._match(TokenType.R_BRACKET): 4358 self._retreat(index) 4359 return None 4360 4361 return exp.DataType( 4362 this=exp.DataType.Type.MAP, 4363 expressions=[key_type, value_type], 4364 nested=True, 4365 prefix=prefix, 4366 ) 4367 4368 nested = type_token in self.NESTED_TYPE_TOKENS 4369 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4370 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4371 expressions = None 4372 maybe_func = False 4373 4374 if self._match(TokenType.L_PAREN): 4375 if is_struct: 4376 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4377 elif nested: 4378 expressions = self._parse_csv( 4379 lambda: self._parse_types( 4380 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4381 ) 4382 ) 4383 elif type_token in self.ENUM_TYPE_TOKENS: 4384 expressions = self._parse_csv(self._parse_equality) 4385 elif is_aggregate: 4386 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4387 any_token=False, tokens=(TokenType.VAR,) 4388 ) 4389 if not func_or_ident or not self._match(TokenType.COMMA): 4390 return None 4391 expressions = self._parse_csv( 4392 lambda: self._parse_types( 4393 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4394 ) 4395 ) 4396 expressions.insert(0, func_or_ident) 4397 else: 4398 expressions = self._parse_csv(self._parse_type_size) 4399 4400 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4401 if type_token == TokenType.VECTOR and len(expressions) == 2: 4402 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4403 4404 if not expressions or not self._match(TokenType.R_PAREN): 4405 self._retreat(index) 4406 return None 4407 4408 maybe_func = True 4409 4410 values: t.Optional[t.List[exp.Expression]] = None 4411 4412 if nested and self._match(TokenType.LT): 4413 if is_struct: 4414 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4415 else: 4416 expressions = self._parse_csv( 4417 lambda: self._parse_types( 4418 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4419 ) 4420 ) 4421 4422 if not self._match(TokenType.GT): 4423 self.raise_error("Expecting >") 4424 4425 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4426 values = self._parse_csv(self._parse_assignment) 4427 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4428 4429 if type_token in self.TIMESTAMPS: 4430 if self._match_text_seq("WITH", "TIME", "ZONE"): 4431 maybe_func = False 4432 tz_type = ( 4433 exp.DataType.Type.TIMETZ 4434 if type_token in self.TIMES 4435 else exp.DataType.Type.TIMESTAMPTZ 4436 ) 4437 this = exp.DataType(this=tz_type, expressions=expressions) 4438 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4439 maybe_func = False 4440 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4441 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4442 maybe_func = False 4443 elif type_token == TokenType.INTERVAL: 4444 unit = self._parse_var(upper=True) 4445 if unit: 4446 if self._match_text_seq("TO"): 4447 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4448 4449 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4450 else: 4451 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4452 4453 if maybe_func and check_func: 4454 index2 = self._index 4455 peek = self._parse_string() 4456 4457 if not peek: 4458 self._retreat(index) 4459 return None 4460 4461 self._retreat(index2) 4462 4463 if not this: 4464 if self._match_text_seq("UNSIGNED"): 4465 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4466 if not unsigned_type_token: 4467 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4468 4469 type_token = unsigned_type_token or type_token 4470 4471 this = exp.DataType( 4472 this=exp.DataType.Type[type_token.value], 4473 expressions=expressions, 4474 nested=nested, 4475 values=values, 4476 prefix=prefix, 4477 ) 4478 elif expressions: 4479 this.set("expressions", expressions) 4480 4481 # https://materialize.com/docs/sql/types/list/#type-name 4482 while self._match(TokenType.LIST): 4483 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4484 4485 index = self._index 4486 4487 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4488 matched_array = self._match(TokenType.ARRAY) 4489 4490 while self._curr: 4491 matched_l_bracket = self._match(TokenType.L_BRACKET) 4492 if not matched_l_bracket and not matched_array: 4493 break 4494 4495 matched_array = False 4496 values = self._parse_csv(self._parse_assignment) or None 4497 if values and not schema: 4498 self._retreat(index) 4499 break 4500 4501 this = exp.DataType( 4502 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4503 ) 4504 self._match(TokenType.R_BRACKET) 4505 4506 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4507 converter = self.TYPE_CONVERTERS.get(this.this) 4508 if converter: 4509 this = converter(t.cast(exp.DataType, this)) 4510 4511 return this 4512 4513 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4514 index = self._index 4515 4516 if ( 4517 self._curr 4518 and self._next 4519 and self._curr.token_type in self.TYPE_TOKENS 4520 and self._next.token_type in self.TYPE_TOKENS 4521 ): 4522 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4523 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4524 this = self._parse_id_var() 4525 else: 4526 this = ( 4527 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4528 or self._parse_id_var() 4529 ) 4530 4531 self._match(TokenType.COLON) 4532 4533 if ( 4534 type_required 4535 and not isinstance(this, exp.DataType) 4536 and not self._match_set(self.TYPE_TOKENS, advance=False) 4537 ): 4538 self._retreat(index) 4539 return self._parse_types() 4540 4541 return self._parse_column_def(this) 4542 4543 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4544 if not self._match_text_seq("AT", "TIME", "ZONE"): 4545 return this 4546 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4547 4548 def _parse_column(self) -> t.Optional[exp.Expression]: 4549 this = self._parse_column_reference() 4550 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4551 4552 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4553 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4554 4555 return column 4556 4557 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4558 this = self._parse_field() 4559 if ( 4560 not this 4561 and self._match(TokenType.VALUES, advance=False) 4562 and self.VALUES_FOLLOWED_BY_PAREN 4563 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4564 ): 4565 this = self._parse_id_var() 4566 4567 if isinstance(this, exp.Identifier): 4568 # We bubble up comments from the Identifier to the Column 4569 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4570 4571 return this 4572 4573 def _parse_colon_as_variant_extract( 4574 self, this: t.Optional[exp.Expression] 4575 ) -> t.Optional[exp.Expression]: 4576 casts = [] 4577 json_path = [] 4578 4579 while self._match(TokenType.COLON): 4580 start_index = self._index 4581 4582 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4583 path = self._parse_column_ops( 4584 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4585 ) 4586 4587 # The cast :: operator has a lower precedence than the extraction operator :, so 4588 # we rearrange the AST appropriately to avoid casting the JSON path 4589 while isinstance(path, exp.Cast): 4590 casts.append(path.to) 4591 path = path.this 4592 4593 if casts: 4594 dcolon_offset = next( 4595 i 4596 for i, t in enumerate(self._tokens[start_index:]) 4597 if t.token_type == TokenType.DCOLON 4598 ) 4599 end_token = self._tokens[start_index + dcolon_offset - 1] 4600 else: 4601 end_token = self._prev 4602 4603 if path: 4604 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4605 4606 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4607 # Databricks transforms it back to the colon/dot notation 4608 if json_path: 4609 this = self.expression( 4610 exp.JSONExtract, 4611 this=this, 4612 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4613 variant_extract=True, 4614 ) 4615 4616 while casts: 4617 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4618 4619 return this 4620 4621 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4622 return self._parse_types() 4623 4624 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4625 this = self._parse_bracket(this) 4626 4627 while self._match_set(self.COLUMN_OPERATORS): 4628 op_token = self._prev.token_type 4629 op = self.COLUMN_OPERATORS.get(op_token) 4630 4631 if op_token == TokenType.DCOLON: 4632 field = self._parse_dcolon() 4633 if not field: 4634 self.raise_error("Expected type") 4635 elif op and self._curr: 4636 field = self._parse_column_reference() 4637 else: 4638 field = self._parse_field(any_token=True, anonymous_func=True) 4639 4640 if isinstance(field, exp.Func) and this: 4641 # bigquery allows function calls like x.y.count(...) 4642 # SAFE.SUBSTR(...) 4643 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4644 this = exp.replace_tree( 4645 this, 4646 lambda n: ( 4647 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4648 if n.table 4649 else n.this 4650 ) 4651 if isinstance(n, exp.Column) 4652 else n, 4653 ) 4654 4655 if op: 4656 this = op(self, this, field) 4657 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4658 this = self.expression( 4659 exp.Column, 4660 this=field, 4661 table=this.this, 4662 db=this.args.get("table"), 4663 catalog=this.args.get("db"), 4664 ) 4665 else: 4666 this = self.expression(exp.Dot, this=this, expression=field) 4667 4668 this = self._parse_bracket(this) 4669 4670 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4671 4672 def _parse_primary(self) -> t.Optional[exp.Expression]: 4673 if self._match_set(self.PRIMARY_PARSERS): 4674 token_type = self._prev.token_type 4675 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4676 4677 if token_type == TokenType.STRING: 4678 expressions = [primary] 4679 while self._match(TokenType.STRING): 4680 expressions.append(exp.Literal.string(self._prev.text)) 4681 4682 if len(expressions) > 1: 4683 return self.expression(exp.Concat, expressions=expressions) 4684 4685 return primary 4686 4687 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4688 return exp.Literal.number(f"0.{self._prev.text}") 4689 4690 if self._match(TokenType.L_PAREN): 4691 comments = self._prev_comments 4692 query = self._parse_select() 4693 4694 if query: 4695 expressions = [query] 4696 else: 4697 expressions = self._parse_expressions() 4698 4699 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4700 4701 if not this and self._match(TokenType.R_PAREN, advance=False): 4702 this = self.expression(exp.Tuple) 4703 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4704 this = self._parse_subquery(this=this, parse_alias=False) 4705 elif isinstance(this, exp.Subquery): 4706 this = self._parse_subquery( 4707 this=self._parse_set_operations(this), parse_alias=False 4708 ) 4709 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4710 this = self.expression(exp.Tuple, expressions=expressions) 4711 else: 4712 this = self.expression(exp.Paren, this=this) 4713 4714 if this: 4715 this.add_comments(comments) 4716 4717 self._match_r_paren(expression=this) 4718 return this 4719 4720 return None 4721 4722 def _parse_field( 4723 self, 4724 any_token: bool = False, 4725 tokens: t.Optional[t.Collection[TokenType]] = None, 4726 anonymous_func: bool = False, 4727 ) -> t.Optional[exp.Expression]: 4728 if anonymous_func: 4729 field = ( 4730 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4731 or self._parse_primary() 4732 ) 4733 else: 4734 field = self._parse_primary() or self._parse_function( 4735 anonymous=anonymous_func, any_token=any_token 4736 ) 4737 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4738 4739 def _parse_function( 4740 self, 4741 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4742 anonymous: bool = False, 4743 optional_parens: bool = True, 4744 any_token: bool = False, 4745 ) -> t.Optional[exp.Expression]: 4746 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4747 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4748 fn_syntax = False 4749 if ( 4750 self._match(TokenType.L_BRACE, advance=False) 4751 and self._next 4752 and self._next.text.upper() == "FN" 4753 ): 4754 self._advance(2) 4755 fn_syntax = True 4756 4757 func = self._parse_function_call( 4758 functions=functions, 4759 anonymous=anonymous, 4760 optional_parens=optional_parens, 4761 any_token=any_token, 4762 ) 4763 4764 if fn_syntax: 4765 self._match(TokenType.R_BRACE) 4766 4767 return func 4768 4769 def _parse_function_call( 4770 self, 4771 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4772 anonymous: bool = False, 4773 optional_parens: bool = True, 4774 any_token: bool = False, 4775 ) -> t.Optional[exp.Expression]: 4776 if not self._curr: 4777 return None 4778 4779 comments = self._curr.comments 4780 token_type = self._curr.token_type 4781 this = self._curr.text 4782 upper = this.upper() 4783 4784 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4785 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4786 self._advance() 4787 return self._parse_window(parser(self)) 4788 4789 if not self._next or self._next.token_type != TokenType.L_PAREN: 4790 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4791 self._advance() 4792 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4793 4794 return None 4795 4796 if any_token: 4797 if token_type in self.RESERVED_TOKENS: 4798 return None 4799 elif token_type not in self.FUNC_TOKENS: 4800 return None 4801 4802 self._advance(2) 4803 4804 parser = self.FUNCTION_PARSERS.get(upper) 4805 if parser and not anonymous: 4806 this = parser(self) 4807 else: 4808 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4809 4810 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4811 this = self.expression(subquery_predicate, this=self._parse_select()) 4812 self._match_r_paren() 4813 return this 4814 4815 if functions is None: 4816 functions = self.FUNCTIONS 4817 4818 function = functions.get(upper) 4819 4820 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4821 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4822 4823 if alias: 4824 args = self._kv_to_prop_eq(args) 4825 4826 if function and not anonymous: 4827 if "dialect" in function.__code__.co_varnames: 4828 func = function(args, dialect=self.dialect) 4829 else: 4830 func = function(args) 4831 4832 func = self.validate_expression(func, args) 4833 if not self.dialect.NORMALIZE_FUNCTIONS: 4834 func.meta["name"] = this 4835 4836 this = func 4837 else: 4838 if token_type == TokenType.IDENTIFIER: 4839 this = exp.Identifier(this=this, quoted=True) 4840 this = self.expression(exp.Anonymous, this=this, expressions=args) 4841 4842 if isinstance(this, exp.Expression): 4843 this.add_comments(comments) 4844 4845 self._match_r_paren(this) 4846 return self._parse_window(this) 4847 4848 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4849 transformed = [] 4850 4851 for e in expressions: 4852 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4853 if isinstance(e, exp.Alias): 4854 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4855 4856 if not isinstance(e, exp.PropertyEQ): 4857 e = self.expression( 4858 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4859 ) 4860 4861 if isinstance(e.this, exp.Column): 4862 e.this.replace(e.this.this) 4863 4864 transformed.append(e) 4865 4866 return transformed 4867 4868 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4869 return self._parse_column_def(self._parse_id_var()) 4870 4871 def _parse_user_defined_function( 4872 self, kind: t.Optional[TokenType] = None 4873 ) -> t.Optional[exp.Expression]: 4874 this = self._parse_id_var() 4875 4876 while self._match(TokenType.DOT): 4877 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4878 4879 if not self._match(TokenType.L_PAREN): 4880 return this 4881 4882 expressions = self._parse_csv(self._parse_function_parameter) 4883 self._match_r_paren() 4884 return self.expression( 4885 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4886 ) 4887 4888 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4889 literal = self._parse_primary() 4890 if literal: 4891 return self.expression(exp.Introducer, this=token.text, expression=literal) 4892 4893 return self.expression(exp.Identifier, this=token.text) 4894 4895 def _parse_session_parameter(self) -> exp.SessionParameter: 4896 kind = None 4897 this = self._parse_id_var() or self._parse_primary() 4898 4899 if this and self._match(TokenType.DOT): 4900 kind = this.name 4901 this = self._parse_var() or self._parse_primary() 4902 4903 return self.expression(exp.SessionParameter, this=this, kind=kind) 4904 4905 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4906 return self._parse_id_var() 4907 4908 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4909 index = self._index 4910 4911 if self._match(TokenType.L_PAREN): 4912 expressions = t.cast( 4913 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4914 ) 4915 4916 if not self._match(TokenType.R_PAREN): 4917 self._retreat(index) 4918 else: 4919 expressions = [self._parse_lambda_arg()] 4920 4921 if self._match_set(self.LAMBDAS): 4922 return self.LAMBDAS[self._prev.token_type](self, expressions) 4923 4924 self._retreat(index) 4925 4926 this: t.Optional[exp.Expression] 4927 4928 if self._match(TokenType.DISTINCT): 4929 this = self.expression( 4930 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4931 ) 4932 else: 4933 this = self._parse_select_or_expression(alias=alias) 4934 4935 return self._parse_limit( 4936 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4937 ) 4938 4939 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4940 index = self._index 4941 if not self._match(TokenType.L_PAREN): 4942 return this 4943 4944 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4945 # expr can be of both types 4946 if self._match_set(self.SELECT_START_TOKENS): 4947 self._retreat(index) 4948 return this 4949 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4950 self._match_r_paren() 4951 return self.expression(exp.Schema, this=this, expressions=args) 4952 4953 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4954 return self._parse_column_def(self._parse_field(any_token=True)) 4955 4956 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4957 # column defs are not really columns, they're identifiers 4958 if isinstance(this, exp.Column): 4959 this = this.this 4960 4961 kind = self._parse_types(schema=True) 4962 4963 if self._match_text_seq("FOR", "ORDINALITY"): 4964 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4965 4966 constraints: t.List[exp.Expression] = [] 4967 4968 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4969 ("ALIAS", "MATERIALIZED") 4970 ): 4971 persisted = self._prev.text.upper() == "MATERIALIZED" 4972 constraints.append( 4973 self.expression( 4974 exp.ComputedColumnConstraint, 4975 this=self._parse_assignment(), 4976 persisted=persisted or self._match_text_seq("PERSISTED"), 4977 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4978 ) 4979 ) 4980 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4981 self._match(TokenType.ALIAS) 4982 constraints.append( 4983 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4984 ) 4985 4986 while True: 4987 constraint = self._parse_column_constraint() 4988 if not constraint: 4989 break 4990 constraints.append(constraint) 4991 4992 if not kind and not constraints: 4993 return this 4994 4995 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4996 4997 def _parse_auto_increment( 4998 self, 4999 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5000 start = None 5001 increment = None 5002 5003 if self._match(TokenType.L_PAREN, advance=False): 5004 args = self._parse_wrapped_csv(self._parse_bitwise) 5005 start = seq_get(args, 0) 5006 increment = seq_get(args, 1) 5007 elif self._match_text_seq("START"): 5008 start = self._parse_bitwise() 5009 self._match_text_seq("INCREMENT") 5010 increment = self._parse_bitwise() 5011 5012 if start and increment: 5013 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5014 5015 return exp.AutoIncrementColumnConstraint() 5016 5017 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5018 if not self._match_text_seq("REFRESH"): 5019 self._retreat(self._index - 1) 5020 return None 5021 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5022 5023 def _parse_compress(self) -> exp.CompressColumnConstraint: 5024 if self._match(TokenType.L_PAREN, advance=False): 5025 return self.expression( 5026 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5027 ) 5028 5029 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5030 5031 def _parse_generated_as_identity( 5032 self, 5033 ) -> ( 5034 exp.GeneratedAsIdentityColumnConstraint 5035 | exp.ComputedColumnConstraint 5036 | exp.GeneratedAsRowColumnConstraint 5037 ): 5038 if self._match_text_seq("BY", "DEFAULT"): 5039 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5040 this = self.expression( 5041 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5042 ) 5043 else: 5044 self._match_text_seq("ALWAYS") 5045 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5046 5047 self._match(TokenType.ALIAS) 5048 5049 if self._match_text_seq("ROW"): 5050 start = self._match_text_seq("START") 5051 if not start: 5052 self._match(TokenType.END) 5053 hidden = self._match_text_seq("HIDDEN") 5054 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5055 5056 identity = self._match_text_seq("IDENTITY") 5057 5058 if self._match(TokenType.L_PAREN): 5059 if self._match(TokenType.START_WITH): 5060 this.set("start", self._parse_bitwise()) 5061 if self._match_text_seq("INCREMENT", "BY"): 5062 this.set("increment", self._parse_bitwise()) 5063 if self._match_text_seq("MINVALUE"): 5064 this.set("minvalue", self._parse_bitwise()) 5065 if self._match_text_seq("MAXVALUE"): 5066 this.set("maxvalue", self._parse_bitwise()) 5067 5068 if self._match_text_seq("CYCLE"): 5069 this.set("cycle", True) 5070 elif self._match_text_seq("NO", "CYCLE"): 5071 this.set("cycle", False) 5072 5073 if not identity: 5074 this.set("expression", self._parse_range()) 5075 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5076 args = self._parse_csv(self._parse_bitwise) 5077 this.set("start", seq_get(args, 0)) 5078 this.set("increment", seq_get(args, 1)) 5079 5080 self._match_r_paren() 5081 5082 return this 5083 5084 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5085 self._match_text_seq("LENGTH") 5086 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5087 5088 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5089 if self._match_text_seq("NULL"): 5090 return self.expression(exp.NotNullColumnConstraint) 5091 if self._match_text_seq("CASESPECIFIC"): 5092 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5093 if self._match_text_seq("FOR", "REPLICATION"): 5094 return self.expression(exp.NotForReplicationColumnConstraint) 5095 return None 5096 5097 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5098 if self._match(TokenType.CONSTRAINT): 5099 this = self._parse_id_var() 5100 else: 5101 this = None 5102 5103 if self._match_texts(self.CONSTRAINT_PARSERS): 5104 return self.expression( 5105 exp.ColumnConstraint, 5106 this=this, 5107 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5108 ) 5109 5110 return this 5111 5112 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5113 if not self._match(TokenType.CONSTRAINT): 5114 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5115 5116 return self.expression( 5117 exp.Constraint, 5118 this=self._parse_id_var(), 5119 expressions=self._parse_unnamed_constraints(), 5120 ) 5121 5122 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5123 constraints = [] 5124 while True: 5125 constraint = self._parse_unnamed_constraint() or self._parse_function() 5126 if not constraint: 5127 break 5128 constraints.append(constraint) 5129 5130 return constraints 5131 5132 def _parse_unnamed_constraint( 5133 self, constraints: t.Optional[t.Collection[str]] = None 5134 ) -> t.Optional[exp.Expression]: 5135 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5136 constraints or self.CONSTRAINT_PARSERS 5137 ): 5138 return None 5139 5140 constraint = self._prev.text.upper() 5141 if constraint not in self.CONSTRAINT_PARSERS: 5142 self.raise_error(f"No parser found for schema constraint {constraint}.") 5143 5144 return self.CONSTRAINT_PARSERS[constraint](self) 5145 5146 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5147 self._match_text_seq("KEY") 5148 return self.expression( 5149 exp.UniqueColumnConstraint, 5150 this=self._parse_schema(self._parse_id_var(any_token=False)), 5151 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5152 on_conflict=self._parse_on_conflict(), 5153 ) 5154 5155 def _parse_key_constraint_options(self) -> t.List[str]: 5156 options = [] 5157 while True: 5158 if not self._curr: 5159 break 5160 5161 if self._match(TokenType.ON): 5162 action = None 5163 on = self._advance_any() and self._prev.text 5164 5165 if self._match_text_seq("NO", "ACTION"): 5166 action = "NO ACTION" 5167 elif self._match_text_seq("CASCADE"): 5168 action = "CASCADE" 5169 elif self._match_text_seq("RESTRICT"): 5170 action = "RESTRICT" 5171 elif self._match_pair(TokenType.SET, TokenType.NULL): 5172 action = "SET NULL" 5173 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5174 action = "SET DEFAULT" 5175 else: 5176 self.raise_error("Invalid key constraint") 5177 5178 options.append(f"ON {on} {action}") 5179 elif self._match_text_seq("NOT", "ENFORCED"): 5180 options.append("NOT ENFORCED") 5181 elif self._match_text_seq("DEFERRABLE"): 5182 options.append("DEFERRABLE") 5183 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5184 options.append("INITIALLY DEFERRED") 5185 elif self._match_text_seq("NORELY"): 5186 options.append("NORELY") 5187 elif self._match_text_seq("MATCH", "FULL"): 5188 options.append("MATCH FULL") 5189 else: 5190 break 5191 5192 return options 5193 5194 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5195 if match and not self._match(TokenType.REFERENCES): 5196 return None 5197 5198 expressions = None 5199 this = self._parse_table(schema=True) 5200 options = self._parse_key_constraint_options() 5201 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5202 5203 def _parse_foreign_key(self) -> exp.ForeignKey: 5204 expressions = self._parse_wrapped_id_vars() 5205 reference = self._parse_references() 5206 options = {} 5207 5208 while self._match(TokenType.ON): 5209 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5210 self.raise_error("Expected DELETE or UPDATE") 5211 5212 kind = self._prev.text.lower() 5213 5214 if self._match_text_seq("NO", "ACTION"): 5215 action = "NO ACTION" 5216 elif self._match(TokenType.SET): 5217 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5218 action = "SET " + self._prev.text.upper() 5219 else: 5220 self._advance() 5221 action = self._prev.text.upper() 5222 5223 options[kind] = action 5224 5225 return self.expression( 5226 exp.ForeignKey, 5227 expressions=expressions, 5228 reference=reference, 5229 **options, # type: ignore 5230 ) 5231 5232 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5233 return self._parse_field() 5234 5235 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5236 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5237 self._retreat(self._index - 1) 5238 return None 5239 5240 id_vars = self._parse_wrapped_id_vars() 5241 return self.expression( 5242 exp.PeriodForSystemTimeConstraint, 5243 this=seq_get(id_vars, 0), 5244 expression=seq_get(id_vars, 1), 5245 ) 5246 5247 def _parse_primary_key( 5248 self, wrapped_optional: bool = False, in_props: bool = False 5249 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5250 desc = ( 5251 self._match_set((TokenType.ASC, TokenType.DESC)) 5252 and self._prev.token_type == TokenType.DESC 5253 ) 5254 5255 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5256 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5257 5258 expressions = self._parse_wrapped_csv( 5259 self._parse_primary_key_part, optional=wrapped_optional 5260 ) 5261 options = self._parse_key_constraint_options() 5262 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5263 5264 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5265 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5266 5267 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5268 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5269 return this 5270 5271 bracket_kind = self._prev.token_type 5272 expressions = self._parse_csv( 5273 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5274 ) 5275 5276 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5277 self.raise_error("Expected ]") 5278 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5279 self.raise_error("Expected }") 5280 5281 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5282 if bracket_kind == TokenType.L_BRACE: 5283 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5284 elif not this: 5285 this = self.expression(exp.Array, expressions=expressions) 5286 else: 5287 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5288 if constructor_type: 5289 return self.expression(constructor_type, expressions=expressions) 5290 5291 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5292 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5293 5294 self._add_comments(this) 5295 return self._parse_bracket(this) 5296 5297 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5298 if self._match(TokenType.COLON): 5299 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5300 return this 5301 5302 def _parse_case(self) -> t.Optional[exp.Expression]: 5303 ifs = [] 5304 default = None 5305 5306 comments = self._prev_comments 5307 expression = self._parse_assignment() 5308 5309 while self._match(TokenType.WHEN): 5310 this = self._parse_assignment() 5311 self._match(TokenType.THEN) 5312 then = self._parse_assignment() 5313 ifs.append(self.expression(exp.If, this=this, true=then)) 5314 5315 if self._match(TokenType.ELSE): 5316 default = self._parse_assignment() 5317 5318 if not self._match(TokenType.END): 5319 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5320 default = exp.column("interval") 5321 else: 5322 self.raise_error("Expected END after CASE", self._prev) 5323 5324 return self.expression( 5325 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5326 ) 5327 5328 def _parse_if(self) -> t.Optional[exp.Expression]: 5329 if self._match(TokenType.L_PAREN): 5330 args = self._parse_csv(self._parse_assignment) 5331 this = self.validate_expression(exp.If.from_arg_list(args), args) 5332 self._match_r_paren() 5333 else: 5334 index = self._index - 1 5335 5336 if self.NO_PAREN_IF_COMMANDS and index == 0: 5337 return self._parse_as_command(self._prev) 5338 5339 condition = self._parse_assignment() 5340 5341 if not condition: 5342 self._retreat(index) 5343 return None 5344 5345 self._match(TokenType.THEN) 5346 true = self._parse_assignment() 5347 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5348 self._match(TokenType.END) 5349 this = self.expression(exp.If, this=condition, true=true, false=false) 5350 5351 return this 5352 5353 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5354 if not self._match_text_seq("VALUE", "FOR"): 5355 self._retreat(self._index - 1) 5356 return None 5357 5358 return self.expression( 5359 exp.NextValueFor, 5360 this=self._parse_column(), 5361 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5362 ) 5363 5364 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5365 this = self._parse_function() or self._parse_var_or_string(upper=True) 5366 5367 if self._match(TokenType.FROM): 5368 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5369 5370 if not self._match(TokenType.COMMA): 5371 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5372 5373 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5374 5375 def _parse_gap_fill(self) -> exp.GapFill: 5376 self._match(TokenType.TABLE) 5377 this = self._parse_table() 5378 5379 self._match(TokenType.COMMA) 5380 args = [this, *self._parse_csv(self._parse_lambda)] 5381 5382 gap_fill = exp.GapFill.from_arg_list(args) 5383 return self.validate_expression(gap_fill, args) 5384 5385 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5386 this = self._parse_assignment() 5387 5388 if not self._match(TokenType.ALIAS): 5389 if self._match(TokenType.COMMA): 5390 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5391 5392 self.raise_error("Expected AS after CAST") 5393 5394 fmt = None 5395 to = self._parse_types() 5396 5397 if self._match(TokenType.FORMAT): 5398 fmt_string = self._parse_string() 5399 fmt = self._parse_at_time_zone(fmt_string) 5400 5401 if not to: 5402 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5403 if to.this in exp.DataType.TEMPORAL_TYPES: 5404 this = self.expression( 5405 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5406 this=this, 5407 format=exp.Literal.string( 5408 format_time( 5409 fmt_string.this if fmt_string else "", 5410 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5411 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5412 ) 5413 ), 5414 safe=safe, 5415 ) 5416 5417 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5418 this.set("zone", fmt.args["zone"]) 5419 return this 5420 elif not to: 5421 self.raise_error("Expected TYPE after CAST") 5422 elif isinstance(to, exp.Identifier): 5423 to = exp.DataType.build(to.name, udt=True) 5424 elif to.this == exp.DataType.Type.CHAR: 5425 if self._match(TokenType.CHARACTER_SET): 5426 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5427 5428 return self.expression( 5429 exp.Cast if strict else exp.TryCast, 5430 this=this, 5431 to=to, 5432 format=fmt, 5433 safe=safe, 5434 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5435 ) 5436 5437 def _parse_string_agg(self) -> exp.Expression: 5438 if self._match(TokenType.DISTINCT): 5439 args: t.List[t.Optional[exp.Expression]] = [ 5440 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5441 ] 5442 if self._match(TokenType.COMMA): 5443 args.extend(self._parse_csv(self._parse_assignment)) 5444 else: 5445 args = self._parse_csv(self._parse_assignment) # type: ignore 5446 5447 index = self._index 5448 if not self._match(TokenType.R_PAREN) and args: 5449 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5450 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5451 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5452 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5453 5454 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5455 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5456 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5457 if not self._match_text_seq("WITHIN", "GROUP"): 5458 self._retreat(index) 5459 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5460 5461 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5462 order = self._parse_order(this=seq_get(args, 0)) 5463 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5464 5465 def _parse_convert( 5466 self, strict: bool, safe: t.Optional[bool] = None 5467 ) -> t.Optional[exp.Expression]: 5468 this = self._parse_bitwise() 5469 5470 if self._match(TokenType.USING): 5471 to: t.Optional[exp.Expression] = self.expression( 5472 exp.CharacterSet, this=self._parse_var() 5473 ) 5474 elif self._match(TokenType.COMMA): 5475 to = self._parse_types() 5476 else: 5477 to = None 5478 5479 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5480 5481 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5482 """ 5483 There are generally two variants of the DECODE function: 5484 5485 - DECODE(bin, charset) 5486 - DECODE(expression, search, result [, search, result] ... [, default]) 5487 5488 The second variant will always be parsed into a CASE expression. Note that NULL 5489 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5490 instead of relying on pattern matching. 5491 """ 5492 args = self._parse_csv(self._parse_assignment) 5493 5494 if len(args) < 3: 5495 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5496 5497 expression, *expressions = args 5498 if not expression: 5499 return None 5500 5501 ifs = [] 5502 for search, result in zip(expressions[::2], expressions[1::2]): 5503 if not search or not result: 5504 return None 5505 5506 if isinstance(search, exp.Literal): 5507 ifs.append( 5508 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5509 ) 5510 elif isinstance(search, exp.Null): 5511 ifs.append( 5512 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5513 ) 5514 else: 5515 cond = exp.or_( 5516 exp.EQ(this=expression.copy(), expression=search), 5517 exp.and_( 5518 exp.Is(this=expression.copy(), expression=exp.Null()), 5519 exp.Is(this=search.copy(), expression=exp.Null()), 5520 copy=False, 5521 ), 5522 copy=False, 5523 ) 5524 ifs.append(exp.If(this=cond, true=result)) 5525 5526 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5527 5528 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5529 self._match_text_seq("KEY") 5530 key = self._parse_column() 5531 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5532 self._match_text_seq("VALUE") 5533 value = self._parse_bitwise() 5534 5535 if not key and not value: 5536 return None 5537 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5538 5539 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5540 if not this or not self._match_text_seq("FORMAT", "JSON"): 5541 return this 5542 5543 return self.expression(exp.FormatJson, this=this) 5544 5545 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5546 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5547 for value in values: 5548 if self._match_text_seq(value, "ON", on): 5549 return f"{value} ON {on}" 5550 5551 return None 5552 5553 @t.overload 5554 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5555 5556 @t.overload 5557 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5558 5559 def _parse_json_object(self, agg=False): 5560 star = self._parse_star() 5561 expressions = ( 5562 [star] 5563 if star 5564 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5565 ) 5566 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5567 5568 unique_keys = None 5569 if self._match_text_seq("WITH", "UNIQUE"): 5570 unique_keys = True 5571 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5572 unique_keys = False 5573 5574 self._match_text_seq("KEYS") 5575 5576 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5577 self._parse_type() 5578 ) 5579 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5580 5581 return self.expression( 5582 exp.JSONObjectAgg if agg else exp.JSONObject, 5583 expressions=expressions, 5584 null_handling=null_handling, 5585 unique_keys=unique_keys, 5586 return_type=return_type, 5587 encoding=encoding, 5588 ) 5589 5590 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5591 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5592 if not self._match_text_seq("NESTED"): 5593 this = self._parse_id_var() 5594 kind = self._parse_types(allow_identifiers=False) 5595 nested = None 5596 else: 5597 this = None 5598 kind = None 5599 nested = True 5600 5601 path = self._match_text_seq("PATH") and self._parse_string() 5602 nested_schema = nested and self._parse_json_schema() 5603 5604 return self.expression( 5605 exp.JSONColumnDef, 5606 this=this, 5607 kind=kind, 5608 path=path, 5609 nested_schema=nested_schema, 5610 ) 5611 5612 def _parse_json_schema(self) -> exp.JSONSchema: 5613 self._match_text_seq("COLUMNS") 5614 return self.expression( 5615 exp.JSONSchema, 5616 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5617 ) 5618 5619 def _parse_json_table(self) -> exp.JSONTable: 5620 this = self._parse_format_json(self._parse_bitwise()) 5621 path = self._match(TokenType.COMMA) and self._parse_string() 5622 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5623 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5624 schema = self._parse_json_schema() 5625 5626 return exp.JSONTable( 5627 this=this, 5628 schema=schema, 5629 path=path, 5630 error_handling=error_handling, 5631 empty_handling=empty_handling, 5632 ) 5633 5634 def _parse_match_against(self) -> exp.MatchAgainst: 5635 expressions = self._parse_csv(self._parse_column) 5636 5637 self._match_text_seq(")", "AGAINST", "(") 5638 5639 this = self._parse_string() 5640 5641 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5642 modifier = "IN NATURAL LANGUAGE MODE" 5643 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5644 modifier = f"{modifier} WITH QUERY EXPANSION" 5645 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5646 modifier = "IN BOOLEAN MODE" 5647 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5648 modifier = "WITH QUERY EXPANSION" 5649 else: 5650 modifier = None 5651 5652 return self.expression( 5653 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5654 ) 5655 5656 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5657 def _parse_open_json(self) -> exp.OpenJSON: 5658 this = self._parse_bitwise() 5659 path = self._match(TokenType.COMMA) and self._parse_string() 5660 5661 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5662 this = self._parse_field(any_token=True) 5663 kind = self._parse_types() 5664 path = self._parse_string() 5665 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5666 5667 return self.expression( 5668 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5669 ) 5670 5671 expressions = None 5672 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5673 self._match_l_paren() 5674 expressions = self._parse_csv(_parse_open_json_column_def) 5675 5676 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5677 5678 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5679 args = self._parse_csv(self._parse_bitwise) 5680 5681 if self._match(TokenType.IN): 5682 return self.expression( 5683 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5684 ) 5685 5686 if haystack_first: 5687 haystack = seq_get(args, 0) 5688 needle = seq_get(args, 1) 5689 else: 5690 needle = seq_get(args, 0) 5691 haystack = seq_get(args, 1) 5692 5693 return self.expression( 5694 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5695 ) 5696 5697 def _parse_predict(self) -> exp.Predict: 5698 self._match_text_seq("MODEL") 5699 this = self._parse_table() 5700 5701 self._match(TokenType.COMMA) 5702 self._match_text_seq("TABLE") 5703 5704 return self.expression( 5705 exp.Predict, 5706 this=this, 5707 expression=self._parse_table(), 5708 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5709 ) 5710 5711 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5712 args = self._parse_csv(self._parse_table) 5713 return exp.JoinHint(this=func_name.upper(), expressions=args) 5714 5715 def _parse_substring(self) -> exp.Substring: 5716 # Postgres supports the form: substring(string [from int] [for int]) 5717 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5718 5719 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5720 5721 if self._match(TokenType.FROM): 5722 args.append(self._parse_bitwise()) 5723 if self._match(TokenType.FOR): 5724 if len(args) == 1: 5725 args.append(exp.Literal.number(1)) 5726 args.append(self._parse_bitwise()) 5727 5728 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5729 5730 def _parse_trim(self) -> exp.Trim: 5731 # https://www.w3resource.com/sql/character-functions/trim.php 5732 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5733 5734 position = None 5735 collation = None 5736 expression = None 5737 5738 if self._match_texts(self.TRIM_TYPES): 5739 position = self._prev.text.upper() 5740 5741 this = self._parse_bitwise() 5742 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5743 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5744 expression = self._parse_bitwise() 5745 5746 if invert_order: 5747 this, expression = expression, this 5748 5749 if self._match(TokenType.COLLATE): 5750 collation = self._parse_bitwise() 5751 5752 return self.expression( 5753 exp.Trim, this=this, position=position, expression=expression, collation=collation 5754 ) 5755 5756 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5757 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5758 5759 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5760 return self._parse_window(self._parse_id_var(), alias=True) 5761 5762 def _parse_respect_or_ignore_nulls( 5763 self, this: t.Optional[exp.Expression] 5764 ) -> t.Optional[exp.Expression]: 5765 if self._match_text_seq("IGNORE", "NULLS"): 5766 return self.expression(exp.IgnoreNulls, this=this) 5767 if self._match_text_seq("RESPECT", "NULLS"): 5768 return self.expression(exp.RespectNulls, this=this) 5769 return this 5770 5771 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5772 if self._match(TokenType.HAVING): 5773 self._match_texts(("MAX", "MIN")) 5774 max = self._prev.text.upper() != "MIN" 5775 return self.expression( 5776 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5777 ) 5778 5779 return this 5780 5781 def _parse_window( 5782 self, this: t.Optional[exp.Expression], alias: bool = False 5783 ) -> t.Optional[exp.Expression]: 5784 func = this 5785 comments = func.comments if isinstance(func, exp.Expression) else None 5786 5787 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5788 self._match(TokenType.WHERE) 5789 this = self.expression( 5790 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5791 ) 5792 self._match_r_paren() 5793 5794 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5795 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5796 if self._match_text_seq("WITHIN", "GROUP"): 5797 order = self._parse_wrapped(self._parse_order) 5798 this = self.expression(exp.WithinGroup, this=this, expression=order) 5799 5800 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5801 # Some dialects choose to implement and some do not. 5802 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5803 5804 # There is some code above in _parse_lambda that handles 5805 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5806 5807 # The below changes handle 5808 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5809 5810 # Oracle allows both formats 5811 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5812 # and Snowflake chose to do the same for familiarity 5813 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5814 if isinstance(this, exp.AggFunc): 5815 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5816 5817 if ignore_respect and ignore_respect is not this: 5818 ignore_respect.replace(ignore_respect.this) 5819 this = self.expression(ignore_respect.__class__, this=this) 5820 5821 this = self._parse_respect_or_ignore_nulls(this) 5822 5823 # bigquery select from window x AS (partition by ...) 5824 if alias: 5825 over = None 5826 self._match(TokenType.ALIAS) 5827 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5828 return this 5829 else: 5830 over = self._prev.text.upper() 5831 5832 if comments and isinstance(func, exp.Expression): 5833 func.pop_comments() 5834 5835 if not self._match(TokenType.L_PAREN): 5836 return self.expression( 5837 exp.Window, 5838 comments=comments, 5839 this=this, 5840 alias=self._parse_id_var(False), 5841 over=over, 5842 ) 5843 5844 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5845 5846 first = self._match(TokenType.FIRST) 5847 if self._match_text_seq("LAST"): 5848 first = False 5849 5850 partition, order = self._parse_partition_and_order() 5851 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5852 5853 if kind: 5854 self._match(TokenType.BETWEEN) 5855 start = self._parse_window_spec() 5856 self._match(TokenType.AND) 5857 end = self._parse_window_spec() 5858 5859 spec = self.expression( 5860 exp.WindowSpec, 5861 kind=kind, 5862 start=start["value"], 5863 start_side=start["side"], 5864 end=end["value"], 5865 end_side=end["side"], 5866 ) 5867 else: 5868 spec = None 5869 5870 self._match_r_paren() 5871 5872 window = self.expression( 5873 exp.Window, 5874 comments=comments, 5875 this=this, 5876 partition_by=partition, 5877 order=order, 5878 spec=spec, 5879 alias=window_alias, 5880 over=over, 5881 first=first, 5882 ) 5883 5884 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5885 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5886 return self._parse_window(window, alias=alias) 5887 5888 return window 5889 5890 def _parse_partition_and_order( 5891 self, 5892 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5893 return self._parse_partition_by(), self._parse_order() 5894 5895 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5896 self._match(TokenType.BETWEEN) 5897 5898 return { 5899 "value": ( 5900 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5901 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5902 or self._parse_bitwise() 5903 ), 5904 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5905 } 5906 5907 def _parse_alias( 5908 self, this: t.Optional[exp.Expression], explicit: bool = False 5909 ) -> t.Optional[exp.Expression]: 5910 any_token = self._match(TokenType.ALIAS) 5911 comments = self._prev_comments or [] 5912 5913 if explicit and not any_token: 5914 return this 5915 5916 if self._match(TokenType.L_PAREN): 5917 aliases = self.expression( 5918 exp.Aliases, 5919 comments=comments, 5920 this=this, 5921 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5922 ) 5923 self._match_r_paren(aliases) 5924 return aliases 5925 5926 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5927 self.STRING_ALIASES and self._parse_string_as_identifier() 5928 ) 5929 5930 if alias: 5931 comments.extend(alias.pop_comments()) 5932 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5933 column = this.this 5934 5935 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5936 if not this.comments and column and column.comments: 5937 this.comments = column.pop_comments() 5938 5939 return this 5940 5941 def _parse_id_var( 5942 self, 5943 any_token: bool = True, 5944 tokens: t.Optional[t.Collection[TokenType]] = None, 5945 ) -> t.Optional[exp.Expression]: 5946 expression = self._parse_identifier() 5947 if not expression and ( 5948 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5949 ): 5950 quoted = self._prev.token_type == TokenType.STRING 5951 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5952 5953 return expression 5954 5955 def _parse_string(self) -> t.Optional[exp.Expression]: 5956 if self._match_set(self.STRING_PARSERS): 5957 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5958 return self._parse_placeholder() 5959 5960 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5961 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5962 5963 def _parse_number(self) -> t.Optional[exp.Expression]: 5964 if self._match_set(self.NUMERIC_PARSERS): 5965 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5966 return self._parse_placeholder() 5967 5968 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5969 if self._match(TokenType.IDENTIFIER): 5970 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5971 return self._parse_placeholder() 5972 5973 def _parse_var( 5974 self, 5975 any_token: bool = False, 5976 tokens: t.Optional[t.Collection[TokenType]] = None, 5977 upper: bool = False, 5978 ) -> t.Optional[exp.Expression]: 5979 if ( 5980 (any_token and self._advance_any()) 5981 or self._match(TokenType.VAR) 5982 or (self._match_set(tokens) if tokens else False) 5983 ): 5984 return self.expression( 5985 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5986 ) 5987 return self._parse_placeholder() 5988 5989 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5990 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5991 self._advance() 5992 return self._prev 5993 return None 5994 5995 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 5996 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 5997 5998 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5999 return self._parse_primary() or self._parse_var(any_token=True) 6000 6001 def _parse_null(self) -> t.Optional[exp.Expression]: 6002 if self._match_set(self.NULL_TOKENS): 6003 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6004 return self._parse_placeholder() 6005 6006 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6007 if self._match(TokenType.TRUE): 6008 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6009 if self._match(TokenType.FALSE): 6010 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6011 return self._parse_placeholder() 6012 6013 def _parse_star(self) -> t.Optional[exp.Expression]: 6014 if self._match(TokenType.STAR): 6015 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6016 return self._parse_placeholder() 6017 6018 def _parse_parameter(self) -> exp.Parameter: 6019 this = self._parse_identifier() or self._parse_primary_or_var() 6020 return self.expression(exp.Parameter, this=this) 6021 6022 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6023 if self._match_set(self.PLACEHOLDER_PARSERS): 6024 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6025 if placeholder: 6026 return placeholder 6027 self._advance(-1) 6028 return None 6029 6030 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6031 if not self._match_texts(keywords): 6032 return None 6033 if self._match(TokenType.L_PAREN, advance=False): 6034 return self._parse_wrapped_csv(self._parse_expression) 6035 6036 expression = self._parse_expression() 6037 return [expression] if expression else None 6038 6039 def _parse_csv( 6040 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6041 ) -> t.List[exp.Expression]: 6042 parse_result = parse_method() 6043 items = [parse_result] if parse_result is not None else [] 6044 6045 while self._match(sep): 6046 self._add_comments(parse_result) 6047 parse_result = parse_method() 6048 if parse_result is not None: 6049 items.append(parse_result) 6050 6051 return items 6052 6053 def _parse_tokens( 6054 self, parse_method: t.Callable, expressions: t.Dict 6055 ) -> t.Optional[exp.Expression]: 6056 this = parse_method() 6057 6058 while self._match_set(expressions): 6059 this = self.expression( 6060 expressions[self._prev.token_type], 6061 this=this, 6062 comments=self._prev_comments, 6063 expression=parse_method(), 6064 ) 6065 6066 return this 6067 6068 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6069 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6070 6071 def _parse_wrapped_csv( 6072 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6073 ) -> t.List[exp.Expression]: 6074 return self._parse_wrapped( 6075 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6076 ) 6077 6078 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6079 wrapped = self._match(TokenType.L_PAREN) 6080 if not wrapped and not optional: 6081 self.raise_error("Expecting (") 6082 parse_result = parse_method() 6083 if wrapped: 6084 self._match_r_paren() 6085 return parse_result 6086 6087 def _parse_expressions(self) -> t.List[exp.Expression]: 6088 return self._parse_csv(self._parse_expression) 6089 6090 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6091 return self._parse_select() or self._parse_set_operations( 6092 self._parse_expression() if alias else self._parse_assignment() 6093 ) 6094 6095 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6096 return self._parse_query_modifiers( 6097 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6098 ) 6099 6100 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6101 this = None 6102 if self._match_texts(self.TRANSACTION_KIND): 6103 this = self._prev.text 6104 6105 self._match_texts(("TRANSACTION", "WORK")) 6106 6107 modes = [] 6108 while True: 6109 mode = [] 6110 while self._match(TokenType.VAR): 6111 mode.append(self._prev.text) 6112 6113 if mode: 6114 modes.append(" ".join(mode)) 6115 if not self._match(TokenType.COMMA): 6116 break 6117 6118 return self.expression(exp.Transaction, this=this, modes=modes) 6119 6120 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6121 chain = None 6122 savepoint = None 6123 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6124 6125 self._match_texts(("TRANSACTION", "WORK")) 6126 6127 if self._match_text_seq("TO"): 6128 self._match_text_seq("SAVEPOINT") 6129 savepoint = self._parse_id_var() 6130 6131 if self._match(TokenType.AND): 6132 chain = not self._match_text_seq("NO") 6133 self._match_text_seq("CHAIN") 6134 6135 if is_rollback: 6136 return self.expression(exp.Rollback, savepoint=savepoint) 6137 6138 return self.expression(exp.Commit, chain=chain) 6139 6140 def _parse_refresh(self) -> exp.Refresh: 6141 self._match(TokenType.TABLE) 6142 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6143 6144 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6145 if not self._match_text_seq("ADD"): 6146 return None 6147 6148 self._match(TokenType.COLUMN) 6149 exists_column = self._parse_exists(not_=True) 6150 expression = self._parse_field_def() 6151 6152 if expression: 6153 expression.set("exists", exists_column) 6154 6155 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6156 if self._match_texts(("FIRST", "AFTER")): 6157 position = self._prev.text 6158 column_position = self.expression( 6159 exp.ColumnPosition, this=self._parse_column(), position=position 6160 ) 6161 expression.set("position", column_position) 6162 6163 return expression 6164 6165 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6166 drop = self._match(TokenType.DROP) and self._parse_drop() 6167 if drop and not isinstance(drop, exp.Command): 6168 drop.set("kind", drop.args.get("kind", "COLUMN")) 6169 return drop 6170 6171 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6172 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6173 return self.expression( 6174 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6175 ) 6176 6177 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6178 index = self._index - 1 6179 6180 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6181 return self._parse_csv( 6182 lambda: self.expression( 6183 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6184 ) 6185 ) 6186 6187 self._retreat(index) 6188 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6189 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6190 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6191 6192 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6193 if self._match_texts(self.ALTER_ALTER_PARSERS): 6194 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6195 6196 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6197 # keyword after ALTER we default to parsing this statement 6198 self._match(TokenType.COLUMN) 6199 column = self._parse_field(any_token=True) 6200 6201 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6202 return self.expression(exp.AlterColumn, this=column, drop=True) 6203 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6204 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6205 if self._match(TokenType.COMMENT): 6206 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6207 if self._match_text_seq("DROP", "NOT", "NULL"): 6208 return self.expression( 6209 exp.AlterColumn, 6210 this=column, 6211 drop=True, 6212 allow_null=True, 6213 ) 6214 if self._match_text_seq("SET", "NOT", "NULL"): 6215 return self.expression( 6216 exp.AlterColumn, 6217 this=column, 6218 allow_null=False, 6219 ) 6220 self._match_text_seq("SET", "DATA") 6221 self._match_text_seq("TYPE") 6222 return self.expression( 6223 exp.AlterColumn, 6224 this=column, 6225 dtype=self._parse_types(), 6226 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6227 using=self._match(TokenType.USING) and self._parse_assignment(), 6228 ) 6229 6230 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6231 if self._match_texts(("ALL", "EVEN", "AUTO")): 6232 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6233 6234 self._match_text_seq("KEY", "DISTKEY") 6235 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6236 6237 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6238 if compound: 6239 self._match_text_seq("SORTKEY") 6240 6241 if self._match(TokenType.L_PAREN, advance=False): 6242 return self.expression( 6243 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6244 ) 6245 6246 self._match_texts(("AUTO", "NONE")) 6247 return self.expression( 6248 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6249 ) 6250 6251 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6252 index = self._index - 1 6253 6254 partition_exists = self._parse_exists() 6255 if self._match(TokenType.PARTITION, advance=False): 6256 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6257 6258 self._retreat(index) 6259 return self._parse_csv(self._parse_drop_column) 6260 6261 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6262 if self._match(TokenType.COLUMN): 6263 exists = self._parse_exists() 6264 old_column = self._parse_column() 6265 to = self._match_text_seq("TO") 6266 new_column = self._parse_column() 6267 6268 if old_column is None or to is None or new_column is None: 6269 return None 6270 6271 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6272 6273 self._match_text_seq("TO") 6274 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6275 6276 def _parse_alter_table_set(self) -> exp.AlterSet: 6277 alter_set = self.expression(exp.AlterSet) 6278 6279 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6280 "TABLE", "PROPERTIES" 6281 ): 6282 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6283 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6284 alter_set.set("expressions", [self._parse_assignment()]) 6285 elif self._match_texts(("LOGGED", "UNLOGGED")): 6286 alter_set.set("option", exp.var(self._prev.text.upper())) 6287 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6288 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6289 elif self._match_text_seq("LOCATION"): 6290 alter_set.set("location", self._parse_field()) 6291 elif self._match_text_seq("ACCESS", "METHOD"): 6292 alter_set.set("access_method", self._parse_field()) 6293 elif self._match_text_seq("TABLESPACE"): 6294 alter_set.set("tablespace", self._parse_field()) 6295 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6296 alter_set.set("file_format", [self._parse_field()]) 6297 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6298 alter_set.set("file_format", self._parse_wrapped_options()) 6299 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6300 alter_set.set("copy_options", self._parse_wrapped_options()) 6301 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6302 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6303 else: 6304 if self._match_text_seq("SERDE"): 6305 alter_set.set("serde", self._parse_field()) 6306 6307 alter_set.set("expressions", [self._parse_properties()]) 6308 6309 return alter_set 6310 6311 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6312 start = self._prev 6313 6314 if not self._match(TokenType.TABLE): 6315 return self._parse_as_command(start) 6316 6317 exists = self._parse_exists() 6318 only = self._match_text_seq("ONLY") 6319 this = self._parse_table(schema=True) 6320 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6321 6322 if self._next: 6323 self._advance() 6324 6325 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6326 if parser: 6327 actions = ensure_list(parser(self)) 6328 options = self._parse_csv(self._parse_property) 6329 6330 if not self._curr and actions: 6331 return self.expression( 6332 exp.AlterTable, 6333 this=this, 6334 exists=exists, 6335 actions=actions, 6336 only=only, 6337 options=options, 6338 cluster=cluster, 6339 ) 6340 6341 return self._parse_as_command(start) 6342 6343 def _parse_merge(self) -> exp.Merge: 6344 self._match(TokenType.INTO) 6345 target = self._parse_table() 6346 6347 if target and self._match(TokenType.ALIAS, advance=False): 6348 target.set("alias", self._parse_table_alias()) 6349 6350 self._match(TokenType.USING) 6351 using = self._parse_table() 6352 6353 self._match(TokenType.ON) 6354 on = self._parse_assignment() 6355 6356 return self.expression( 6357 exp.Merge, 6358 this=target, 6359 using=using, 6360 on=on, 6361 expressions=self._parse_when_matched(), 6362 ) 6363 6364 def _parse_when_matched(self) -> t.List[exp.When]: 6365 whens = [] 6366 6367 while self._match(TokenType.WHEN): 6368 matched = not self._match(TokenType.NOT) 6369 self._match_text_seq("MATCHED") 6370 source = ( 6371 False 6372 if self._match_text_seq("BY", "TARGET") 6373 else self._match_text_seq("BY", "SOURCE") 6374 ) 6375 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6376 6377 self._match(TokenType.THEN) 6378 6379 if self._match(TokenType.INSERT): 6380 _this = self._parse_star() 6381 if _this: 6382 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6383 else: 6384 then = self.expression( 6385 exp.Insert, 6386 this=self._parse_value(), 6387 expression=self._match_text_seq("VALUES") and self._parse_value(), 6388 ) 6389 elif self._match(TokenType.UPDATE): 6390 expressions = self._parse_star() 6391 if expressions: 6392 then = self.expression(exp.Update, expressions=expressions) 6393 else: 6394 then = self.expression( 6395 exp.Update, 6396 expressions=self._match(TokenType.SET) 6397 and self._parse_csv(self._parse_equality), 6398 ) 6399 elif self._match(TokenType.DELETE): 6400 then = self.expression(exp.Var, this=self._prev.text) 6401 else: 6402 then = None 6403 6404 whens.append( 6405 self.expression( 6406 exp.When, 6407 matched=matched, 6408 source=source, 6409 condition=condition, 6410 then=then, 6411 ) 6412 ) 6413 return whens 6414 6415 def _parse_show(self) -> t.Optional[exp.Expression]: 6416 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6417 if parser: 6418 return parser(self) 6419 return self._parse_as_command(self._prev) 6420 6421 def _parse_set_item_assignment( 6422 self, kind: t.Optional[str] = None 6423 ) -> t.Optional[exp.Expression]: 6424 index = self._index 6425 6426 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6427 return self._parse_set_transaction(global_=kind == "GLOBAL") 6428 6429 left = self._parse_primary() or self._parse_column() 6430 assignment_delimiter = self._match_texts(("=", "TO")) 6431 6432 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6433 self._retreat(index) 6434 return None 6435 6436 right = self._parse_statement() or self._parse_id_var() 6437 if isinstance(right, (exp.Column, exp.Identifier)): 6438 right = exp.var(right.name) 6439 6440 this = self.expression(exp.EQ, this=left, expression=right) 6441 return self.expression(exp.SetItem, this=this, kind=kind) 6442 6443 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6444 self._match_text_seq("TRANSACTION") 6445 characteristics = self._parse_csv( 6446 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6447 ) 6448 return self.expression( 6449 exp.SetItem, 6450 expressions=characteristics, 6451 kind="TRANSACTION", 6452 **{"global": global_}, # type: ignore 6453 ) 6454 6455 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6456 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6457 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6458 6459 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6460 index = self._index 6461 set_ = self.expression( 6462 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6463 ) 6464 6465 if self._curr: 6466 self._retreat(index) 6467 return self._parse_as_command(self._prev) 6468 6469 return set_ 6470 6471 def _parse_var_from_options( 6472 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6473 ) -> t.Optional[exp.Var]: 6474 start = self._curr 6475 if not start: 6476 return None 6477 6478 option = start.text.upper() 6479 continuations = options.get(option) 6480 6481 index = self._index 6482 self._advance() 6483 for keywords in continuations or []: 6484 if isinstance(keywords, str): 6485 keywords = (keywords,) 6486 6487 if self._match_text_seq(*keywords): 6488 option = f"{option} {' '.join(keywords)}" 6489 break 6490 else: 6491 if continuations or continuations is None: 6492 if raise_unmatched: 6493 self.raise_error(f"Unknown option {option}") 6494 6495 self._retreat(index) 6496 return None 6497 6498 return exp.var(option) 6499 6500 def _parse_as_command(self, start: Token) -> exp.Command: 6501 while self._curr: 6502 self._advance() 6503 text = self._find_sql(start, self._prev) 6504 size = len(start.text) 6505 self._warn_unsupported() 6506 return exp.Command(this=text[:size], expression=text[size:]) 6507 6508 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6509 settings = [] 6510 6511 self._match_l_paren() 6512 kind = self._parse_id_var() 6513 6514 if self._match(TokenType.L_PAREN): 6515 while True: 6516 key = self._parse_id_var() 6517 value = self._parse_primary() 6518 6519 if not key and value is None: 6520 break 6521 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6522 self._match(TokenType.R_PAREN) 6523 6524 self._match_r_paren() 6525 6526 return self.expression( 6527 exp.DictProperty, 6528 this=this, 6529 kind=kind.this if kind else None, 6530 settings=settings, 6531 ) 6532 6533 def _parse_dict_range(self, this: str) -> exp.DictRange: 6534 self._match_l_paren() 6535 has_min = self._match_text_seq("MIN") 6536 if has_min: 6537 min = self._parse_var() or self._parse_primary() 6538 self._match_text_seq("MAX") 6539 max = self._parse_var() or self._parse_primary() 6540 else: 6541 max = self._parse_var() or self._parse_primary() 6542 min = exp.Literal.number(0) 6543 self._match_r_paren() 6544 return self.expression(exp.DictRange, this=this, min=min, max=max) 6545 6546 def _parse_comprehension( 6547 self, this: t.Optional[exp.Expression] 6548 ) -> t.Optional[exp.Comprehension]: 6549 index = self._index 6550 expression = self._parse_column() 6551 if not self._match(TokenType.IN): 6552 self._retreat(index - 1) 6553 return None 6554 iterator = self._parse_column() 6555 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6556 return self.expression( 6557 exp.Comprehension, 6558 this=this, 6559 expression=expression, 6560 iterator=iterator, 6561 condition=condition, 6562 ) 6563 6564 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6565 if self._match(TokenType.HEREDOC_STRING): 6566 return self.expression(exp.Heredoc, this=self._prev.text) 6567 6568 if not self._match_text_seq("$"): 6569 return None 6570 6571 tags = ["$"] 6572 tag_text = None 6573 6574 if self._is_connected(): 6575 self._advance() 6576 tags.append(self._prev.text.upper()) 6577 else: 6578 self.raise_error("No closing $ found") 6579 6580 if tags[-1] != "$": 6581 if self._is_connected() and self._match_text_seq("$"): 6582 tag_text = tags[-1] 6583 tags.append("$") 6584 else: 6585 self.raise_error("No closing $ found") 6586 6587 heredoc_start = self._curr 6588 6589 while self._curr: 6590 if self._match_text_seq(*tags, advance=False): 6591 this = self._find_sql(heredoc_start, self._prev) 6592 self._advance(len(tags)) 6593 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6594 6595 self._advance() 6596 6597 self.raise_error(f"No closing {''.join(tags)} found") 6598 return None 6599 6600 def _find_parser( 6601 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6602 ) -> t.Optional[t.Callable]: 6603 if not self._curr: 6604 return None 6605 6606 index = self._index 6607 this = [] 6608 while True: 6609 # The current token might be multiple words 6610 curr = self._curr.text.upper() 6611 key = curr.split(" ") 6612 this.append(curr) 6613 6614 self._advance() 6615 result, trie = in_trie(trie, key) 6616 if result == TrieResult.FAILED: 6617 break 6618 6619 if result == TrieResult.EXISTS: 6620 subparser = parsers[" ".join(this)] 6621 return subparser 6622 6623 self._retreat(index) 6624 return None 6625 6626 def _match(self, token_type, advance=True, expression=None): 6627 if not self._curr: 6628 return None 6629 6630 if self._curr.token_type == token_type: 6631 if advance: 6632 self._advance() 6633 self._add_comments(expression) 6634 return True 6635 6636 return None 6637 6638 def _match_set(self, types, advance=True): 6639 if not self._curr: 6640 return None 6641 6642 if self._curr.token_type in types: 6643 if advance: 6644 self._advance() 6645 return True 6646 6647 return None 6648 6649 def _match_pair(self, token_type_a, token_type_b, advance=True): 6650 if not self._curr or not self._next: 6651 return None 6652 6653 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6654 if advance: 6655 self._advance(2) 6656 return True 6657 6658 return None 6659 6660 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6661 if not self._match(TokenType.L_PAREN, expression=expression): 6662 self.raise_error("Expecting (") 6663 6664 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6665 if not self._match(TokenType.R_PAREN, expression=expression): 6666 self.raise_error("Expecting )") 6667 6668 def _match_texts(self, texts, advance=True): 6669 if self._curr and self._curr.text.upper() in texts: 6670 if advance: 6671 self._advance() 6672 return True 6673 return None 6674 6675 def _match_text_seq(self, *texts, advance=True): 6676 index = self._index 6677 for text in texts: 6678 if self._curr and self._curr.text.upper() == text: 6679 self._advance() 6680 else: 6681 self._retreat(index) 6682 return None 6683 6684 if not advance: 6685 self._retreat(index) 6686 6687 return True 6688 6689 def _replace_lambda( 6690 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6691 ) -> t.Optional[exp.Expression]: 6692 if not node: 6693 return node 6694 6695 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6696 6697 for column in node.find_all(exp.Column): 6698 typ = lambda_types.get(column.parts[0].name) 6699 if typ is not None: 6700 dot_or_id = column.to_dot() if column.table else column.this 6701 6702 if typ: 6703 dot_or_id = self.expression( 6704 exp.Cast, 6705 this=dot_or_id, 6706 to=typ, 6707 ) 6708 6709 parent = column.parent 6710 6711 while isinstance(parent, exp.Dot): 6712 if not isinstance(parent.parent, exp.Dot): 6713 parent.replace(dot_or_id) 6714 break 6715 parent = parent.parent 6716 else: 6717 if column is node: 6718 node = dot_or_id 6719 else: 6720 column.replace(dot_or_id) 6721 return node 6722 6723 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6724 start = self._prev 6725 6726 # Not to be confused with TRUNCATE(number, decimals) function call 6727 if self._match(TokenType.L_PAREN): 6728 self._retreat(self._index - 2) 6729 return self._parse_function() 6730 6731 # Clickhouse supports TRUNCATE DATABASE as well 6732 is_database = self._match(TokenType.DATABASE) 6733 6734 self._match(TokenType.TABLE) 6735 6736 exists = self._parse_exists(not_=False) 6737 6738 expressions = self._parse_csv( 6739 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6740 ) 6741 6742 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6743 6744 if self._match_text_seq("RESTART", "IDENTITY"): 6745 identity = "RESTART" 6746 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6747 identity = "CONTINUE" 6748 else: 6749 identity = None 6750 6751 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6752 option = self._prev.text 6753 else: 6754 option = None 6755 6756 partition = self._parse_partition() 6757 6758 # Fallback case 6759 if self._curr: 6760 return self._parse_as_command(start) 6761 6762 return self.expression( 6763 exp.TruncateTable, 6764 expressions=expressions, 6765 is_database=is_database, 6766 exists=exists, 6767 cluster=cluster, 6768 identity=identity, 6769 option=option, 6770 partition=partition, 6771 ) 6772 6773 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6774 this = self._parse_ordered(self._parse_opclass) 6775 6776 if not self._match(TokenType.WITH): 6777 return this 6778 6779 op = self._parse_var(any_token=True) 6780 6781 return self.expression(exp.WithOperator, this=this, op=op) 6782 6783 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6784 self._match(TokenType.EQ) 6785 self._match(TokenType.L_PAREN) 6786 6787 opts: t.List[t.Optional[exp.Expression]] = [] 6788 while self._curr and not self._match(TokenType.R_PAREN): 6789 if self._match_text_seq("FORMAT_NAME", "="): 6790 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6791 # so we parse it separately to use _parse_field() 6792 prop = self.expression( 6793 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6794 ) 6795 opts.append(prop) 6796 else: 6797 opts.append(self._parse_property()) 6798 6799 self._match(TokenType.COMMA) 6800 6801 return opts 6802 6803 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6804 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6805 6806 options = [] 6807 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6808 option = self._parse_var(any_token=True) 6809 prev = self._prev.text.upper() 6810 6811 # Different dialects might separate options and values by white space, "=" and "AS" 6812 self._match(TokenType.EQ) 6813 self._match(TokenType.ALIAS) 6814 6815 param = self.expression(exp.CopyParameter, this=option) 6816 6817 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6818 TokenType.L_PAREN, advance=False 6819 ): 6820 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6821 param.set("expressions", self._parse_wrapped_options()) 6822 elif prev == "FILE_FORMAT": 6823 # T-SQL's external file format case 6824 param.set("expression", self._parse_field()) 6825 else: 6826 param.set("expression", self._parse_unquoted_field()) 6827 6828 options.append(param) 6829 self._match(sep) 6830 6831 return options 6832 6833 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6834 expr = self.expression(exp.Credentials) 6835 6836 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6837 expr.set("storage", self._parse_field()) 6838 if self._match_text_seq("CREDENTIALS"): 6839 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6840 creds = ( 6841 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6842 ) 6843 expr.set("credentials", creds) 6844 if self._match_text_seq("ENCRYPTION"): 6845 expr.set("encryption", self._parse_wrapped_options()) 6846 if self._match_text_seq("IAM_ROLE"): 6847 expr.set("iam_role", self._parse_field()) 6848 if self._match_text_seq("REGION"): 6849 expr.set("region", self._parse_field()) 6850 6851 return expr 6852 6853 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6854 return self._parse_field() 6855 6856 def _parse_copy(self) -> exp.Copy | exp.Command: 6857 start = self._prev 6858 6859 self._match(TokenType.INTO) 6860 6861 this = ( 6862 self._parse_select(nested=True, parse_subquery_alias=False) 6863 if self._match(TokenType.L_PAREN, advance=False) 6864 else self._parse_table(schema=True) 6865 ) 6866 6867 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6868 6869 files = self._parse_csv(self._parse_file_location) 6870 credentials = self._parse_credentials() 6871 6872 self._match_text_seq("WITH") 6873 6874 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6875 6876 # Fallback case 6877 if self._curr: 6878 return self._parse_as_command(start) 6879 6880 return self.expression( 6881 exp.Copy, 6882 this=this, 6883 kind=kind, 6884 credentials=credentials, 6885 files=files, 6886 params=params, 6887 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1249 def __init__( 1250 self, 1251 error_level: t.Optional[ErrorLevel] = None, 1252 error_message_context: int = 100, 1253 max_errors: int = 3, 1254 dialect: DialectType = None, 1255 ): 1256 from sqlglot.dialects import Dialect 1257 1258 self.error_level = error_level or ErrorLevel.IMMEDIATE 1259 self.error_message_context = error_message_context 1260 self.max_errors = max_errors 1261 self.dialect = Dialect.get_or_raise(dialect) 1262 self.reset()
1274 def parse( 1275 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1276 ) -> t.List[t.Optional[exp.Expression]]: 1277 """ 1278 Parses a list of tokens and returns a list of syntax trees, one tree 1279 per parsed SQL statement. 1280 1281 Args: 1282 raw_tokens: The list of tokens. 1283 sql: The original SQL string, used to produce helpful debug messages. 1284 1285 Returns: 1286 The list of the produced syntax trees. 1287 """ 1288 return self._parse( 1289 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1290 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1292 def parse_into( 1293 self, 1294 expression_types: exp.IntoType, 1295 raw_tokens: t.List[Token], 1296 sql: t.Optional[str] = None, 1297 ) -> t.List[t.Optional[exp.Expression]]: 1298 """ 1299 Parses a list of tokens into a given Expression type. If a collection of Expression 1300 types is given instead, this method will try to parse the token list into each one 1301 of them, stopping at the first for which the parsing succeeds. 1302 1303 Args: 1304 expression_types: The expression type(s) to try and parse the token list into. 1305 raw_tokens: The list of tokens. 1306 sql: The original SQL string, used to produce helpful debug messages. 1307 1308 Returns: 1309 The target Expression. 1310 """ 1311 errors = [] 1312 for expression_type in ensure_list(expression_types): 1313 parser = self.EXPRESSION_PARSERS.get(expression_type) 1314 if not parser: 1315 raise TypeError(f"No parser registered for {expression_type}") 1316 1317 try: 1318 return self._parse(parser, raw_tokens, sql) 1319 except ParseError as e: 1320 e.errors[0]["into_expression"] = expression_type 1321 errors.append(e) 1322 1323 raise ParseError( 1324 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1325 errors=merge_errors(errors), 1326 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1366 def check_errors(self) -> None: 1367 """Logs or raises any found errors, depending on the chosen error level setting.""" 1368 if self.error_level == ErrorLevel.WARN: 1369 for error in self.errors: 1370 logger.error(str(error)) 1371 elif self.error_level == ErrorLevel.RAISE and self.errors: 1372 raise ParseError( 1373 concat_messages(self.errors, self.max_errors), 1374 errors=merge_errors(self.errors), 1375 )
Logs or raises any found errors, depending on the chosen error level setting.
1377 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1378 """ 1379 Appends an error in the list of recorded errors or raises it, depending on the chosen 1380 error level setting. 1381 """ 1382 token = token or self._curr or self._prev or Token.string("") 1383 start = token.start 1384 end = token.end + 1 1385 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1386 highlight = self.sql[start:end] 1387 end_context = self.sql[end : end + self.error_message_context] 1388 1389 error = ParseError.new( 1390 f"{message}. Line {token.line}, Col: {token.col}.\n" 1391 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1392 description=message, 1393 line=token.line, 1394 col=token.col, 1395 start_context=start_context, 1396 highlight=highlight, 1397 end_context=end_context, 1398 ) 1399 1400 if self.error_level == ErrorLevel.IMMEDIATE: 1401 raise error 1402 1403 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1405 def expression( 1406 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1407 ) -> E: 1408 """ 1409 Creates a new, validated Expression. 1410 1411 Args: 1412 exp_class: The expression class to instantiate. 1413 comments: An optional list of comments to attach to the expression. 1414 kwargs: The arguments to set for the expression along with their respective values. 1415 1416 Returns: 1417 The target expression. 1418 """ 1419 instance = exp_class(**kwargs) 1420 instance.add_comments(comments) if comments else self._add_comments(instance) 1421 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1428 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1429 """ 1430 Validates an Expression, making sure that all its mandatory arguments are set. 1431 1432 Args: 1433 expression: The expression to validate. 1434 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1435 1436 Returns: 1437 The validated expression. 1438 """ 1439 if self.error_level != ErrorLevel.IGNORE: 1440 for error_message in expression.error_messages(args): 1441 self.raise_error(error_message) 1442 1443 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.