sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.DECIMAL256, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.POINT, 361 TokenType.RING, 362 TokenType.LINESTRING, 363 TokenType.MULTILINESTRING, 364 TokenType.POLYGON, 365 TokenType.MULTIPOLYGON, 366 TokenType.HLLSKETCH, 367 TokenType.HSTORE, 368 TokenType.PSEUDO_TYPE, 369 TokenType.SUPER, 370 TokenType.SERIAL, 371 TokenType.SMALLSERIAL, 372 TokenType.BIGSERIAL, 373 TokenType.XML, 374 TokenType.YEAR, 375 TokenType.UNIQUEIDENTIFIER, 376 TokenType.USERDEFINED, 377 TokenType.MONEY, 378 TokenType.SMALLMONEY, 379 TokenType.ROWVERSION, 380 TokenType.IMAGE, 381 TokenType.VARIANT, 382 TokenType.VECTOR, 383 TokenType.OBJECT, 384 TokenType.OBJECT_IDENTIFIER, 385 TokenType.INET, 386 TokenType.IPADDRESS, 387 TokenType.IPPREFIX, 388 TokenType.IPV4, 389 TokenType.IPV6, 390 TokenType.UNKNOWN, 391 TokenType.NULL, 392 TokenType.NAME, 393 TokenType.TDIGEST, 394 *ENUM_TYPE_TOKENS, 395 *NESTED_TYPE_TOKENS, 396 *AGGREGATE_TYPE_TOKENS, 397 } 398 399 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 400 TokenType.BIGINT: TokenType.UBIGINT, 401 TokenType.INT: TokenType.UINT, 402 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 403 TokenType.SMALLINT: TokenType.USMALLINT, 404 TokenType.TINYINT: TokenType.UTINYINT, 405 TokenType.DECIMAL: TokenType.UDECIMAL, 406 } 407 408 SUBQUERY_PREDICATES = { 409 TokenType.ANY: exp.Any, 410 TokenType.ALL: exp.All, 411 TokenType.EXISTS: exp.Exists, 412 TokenType.SOME: exp.Any, 413 } 414 415 RESERVED_TOKENS = { 416 *Tokenizer.SINGLE_TOKENS.values(), 417 TokenType.SELECT, 418 } - {TokenType.IDENTIFIER} 419 420 DB_CREATABLES = { 421 TokenType.DATABASE, 422 TokenType.DICTIONARY, 423 TokenType.MODEL, 424 TokenType.SCHEMA, 425 TokenType.SEQUENCE, 426 TokenType.STORAGE_INTEGRATION, 427 TokenType.TABLE, 428 TokenType.TAG, 429 TokenType.VIEW, 430 TokenType.WAREHOUSE, 431 TokenType.STREAMLIT, 432 TokenType.SINK, 433 TokenType.SOURCE, 434 } 435 436 CREATABLES = { 437 TokenType.COLUMN, 438 TokenType.CONSTRAINT, 439 TokenType.FOREIGN_KEY, 440 TokenType.FUNCTION, 441 TokenType.INDEX, 442 TokenType.PROCEDURE, 443 *DB_CREATABLES, 444 } 445 446 ALTERABLES = { 447 TokenType.INDEX, 448 TokenType.TABLE, 449 TokenType.VIEW, 450 } 451 452 # Tokens that can represent identifiers 453 ID_VAR_TOKENS = { 454 TokenType.ALL, 455 TokenType.ATTACH, 456 TokenType.VAR, 457 TokenType.ANTI, 458 TokenType.APPLY, 459 TokenType.ASC, 460 TokenType.ASOF, 461 TokenType.AUTO_INCREMENT, 462 TokenType.BEGIN, 463 TokenType.BPCHAR, 464 TokenType.CACHE, 465 TokenType.CASE, 466 TokenType.COLLATE, 467 TokenType.COMMAND, 468 TokenType.COMMENT, 469 TokenType.COMMIT, 470 TokenType.CONSTRAINT, 471 TokenType.COPY, 472 TokenType.CUBE, 473 TokenType.DEFAULT, 474 TokenType.DELETE, 475 TokenType.DESC, 476 TokenType.DESCRIBE, 477 TokenType.DETACH, 478 TokenType.DICTIONARY, 479 TokenType.DIV, 480 TokenType.END, 481 TokenType.EXECUTE, 482 TokenType.ESCAPE, 483 TokenType.FALSE, 484 TokenType.FIRST, 485 TokenType.FILTER, 486 TokenType.FINAL, 487 TokenType.FORMAT, 488 TokenType.FULL, 489 TokenType.IDENTIFIER, 490 TokenType.IS, 491 TokenType.ISNULL, 492 TokenType.INTERVAL, 493 TokenType.KEEP, 494 TokenType.KILL, 495 TokenType.LEFT, 496 TokenType.LOAD, 497 TokenType.MERGE, 498 TokenType.NATURAL, 499 TokenType.NEXT, 500 TokenType.OFFSET, 501 TokenType.OPERATOR, 502 TokenType.ORDINALITY, 503 TokenType.OVERLAPS, 504 TokenType.OVERWRITE, 505 TokenType.PARTITION, 506 TokenType.PERCENT, 507 TokenType.PIVOT, 508 TokenType.PRAGMA, 509 TokenType.RANGE, 510 TokenType.RECURSIVE, 511 TokenType.REFERENCES, 512 TokenType.REFRESH, 513 TokenType.RENAME, 514 TokenType.REPLACE, 515 TokenType.RIGHT, 516 TokenType.ROLLUP, 517 TokenType.ROW, 518 TokenType.ROWS, 519 TokenType.SEMI, 520 TokenType.SET, 521 TokenType.SETTINGS, 522 TokenType.SHOW, 523 TokenType.TEMPORARY, 524 TokenType.TOP, 525 TokenType.TRUE, 526 TokenType.TRUNCATE, 527 TokenType.UNIQUE, 528 TokenType.UNNEST, 529 TokenType.UNPIVOT, 530 TokenType.UPDATE, 531 TokenType.USE, 532 TokenType.VOLATILE, 533 TokenType.WINDOW, 534 *CREATABLES, 535 *SUBQUERY_PREDICATES, 536 *TYPE_TOKENS, 537 *NO_PAREN_FUNCTIONS, 538 } 539 ID_VAR_TOKENS.remove(TokenType.UNION) 540 541 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 542 543 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 544 TokenType.ANTI, 545 TokenType.APPLY, 546 TokenType.ASOF, 547 TokenType.FULL, 548 TokenType.LEFT, 549 TokenType.LOCK, 550 TokenType.NATURAL, 551 TokenType.OFFSET, 552 TokenType.RIGHT, 553 TokenType.SEMI, 554 TokenType.WINDOW, 555 } 556 557 ALIAS_TOKENS = ID_VAR_TOKENS 558 559 ARRAY_CONSTRUCTORS = { 560 "ARRAY": exp.Array, 561 "LIST": exp.List, 562 } 563 564 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 565 566 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 567 568 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 569 570 FUNC_TOKENS = { 571 TokenType.COLLATE, 572 TokenType.COMMAND, 573 TokenType.CURRENT_DATE, 574 TokenType.CURRENT_DATETIME, 575 TokenType.CURRENT_TIMESTAMP, 576 TokenType.CURRENT_TIME, 577 TokenType.CURRENT_USER, 578 TokenType.FILTER, 579 TokenType.FIRST, 580 TokenType.FORMAT, 581 TokenType.GLOB, 582 TokenType.IDENTIFIER, 583 TokenType.INDEX, 584 TokenType.ISNULL, 585 TokenType.ILIKE, 586 TokenType.INSERT, 587 TokenType.LIKE, 588 TokenType.MERGE, 589 TokenType.NEXT, 590 TokenType.OFFSET, 591 TokenType.PRIMARY_KEY, 592 TokenType.RANGE, 593 TokenType.REPLACE, 594 TokenType.RLIKE, 595 TokenType.ROW, 596 TokenType.UNNEST, 597 TokenType.VAR, 598 TokenType.LEFT, 599 TokenType.RIGHT, 600 TokenType.SEQUENCE, 601 TokenType.DATE, 602 TokenType.DATETIME, 603 TokenType.TABLE, 604 TokenType.TIMESTAMP, 605 TokenType.TIMESTAMPTZ, 606 TokenType.TRUNCATE, 607 TokenType.WINDOW, 608 TokenType.XOR, 609 *TYPE_TOKENS, 610 *SUBQUERY_PREDICATES, 611 } 612 613 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 614 TokenType.AND: exp.And, 615 } 616 617 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 618 TokenType.COLON_EQ: exp.PropertyEQ, 619 } 620 621 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 622 TokenType.OR: exp.Or, 623 } 624 625 EQUALITY = { 626 TokenType.EQ: exp.EQ, 627 TokenType.NEQ: exp.NEQ, 628 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 629 } 630 631 COMPARISON = { 632 TokenType.GT: exp.GT, 633 TokenType.GTE: exp.GTE, 634 TokenType.LT: exp.LT, 635 TokenType.LTE: exp.LTE, 636 } 637 638 BITWISE = { 639 TokenType.AMP: exp.BitwiseAnd, 640 TokenType.CARET: exp.BitwiseXor, 641 TokenType.PIPE: exp.BitwiseOr, 642 } 643 644 TERM = { 645 TokenType.DASH: exp.Sub, 646 TokenType.PLUS: exp.Add, 647 TokenType.MOD: exp.Mod, 648 TokenType.COLLATE: exp.Collate, 649 } 650 651 FACTOR = { 652 TokenType.DIV: exp.IntDiv, 653 TokenType.LR_ARROW: exp.Distance, 654 TokenType.SLASH: exp.Div, 655 TokenType.STAR: exp.Mul, 656 } 657 658 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 659 660 TIMES = { 661 TokenType.TIME, 662 TokenType.TIMETZ, 663 } 664 665 TIMESTAMPS = { 666 TokenType.TIMESTAMP, 667 TokenType.TIMESTAMPTZ, 668 TokenType.TIMESTAMPLTZ, 669 *TIMES, 670 } 671 672 SET_OPERATIONS = { 673 TokenType.UNION, 674 TokenType.INTERSECT, 675 TokenType.EXCEPT, 676 } 677 678 JOIN_METHODS = { 679 TokenType.ASOF, 680 TokenType.NATURAL, 681 TokenType.POSITIONAL, 682 } 683 684 JOIN_SIDES = { 685 TokenType.LEFT, 686 TokenType.RIGHT, 687 TokenType.FULL, 688 } 689 690 JOIN_KINDS = { 691 TokenType.ANTI, 692 TokenType.CROSS, 693 TokenType.INNER, 694 TokenType.OUTER, 695 TokenType.SEMI, 696 TokenType.STRAIGHT_JOIN, 697 } 698 699 JOIN_HINTS: t.Set[str] = set() 700 701 LAMBDAS = { 702 TokenType.ARROW: lambda self, expressions: self.expression( 703 exp.Lambda, 704 this=self._replace_lambda( 705 self._parse_assignment(), 706 expressions, 707 ), 708 expressions=expressions, 709 ), 710 TokenType.FARROW: lambda self, expressions: self.expression( 711 exp.Kwarg, 712 this=exp.var(expressions[0].name), 713 expression=self._parse_assignment(), 714 ), 715 } 716 717 COLUMN_OPERATORS = { 718 TokenType.DOT: None, 719 TokenType.DCOLON: lambda self, this, to: self.expression( 720 exp.Cast if self.STRICT_CAST else exp.TryCast, 721 this=this, 722 to=to, 723 ), 724 TokenType.ARROW: lambda self, this, path: self.expression( 725 exp.JSONExtract, 726 this=this, 727 expression=self.dialect.to_json_path(path), 728 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 729 ), 730 TokenType.DARROW: lambda self, this, path: self.expression( 731 exp.JSONExtractScalar, 732 this=this, 733 expression=self.dialect.to_json_path(path), 734 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 735 ), 736 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 737 exp.JSONBExtract, 738 this=this, 739 expression=path, 740 ), 741 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 742 exp.JSONBExtractScalar, 743 this=this, 744 expression=path, 745 ), 746 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 747 exp.JSONBContains, 748 this=this, 749 expression=key, 750 ), 751 } 752 753 EXPRESSION_PARSERS = { 754 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 755 exp.Column: lambda self: self._parse_column(), 756 exp.Condition: lambda self: self._parse_assignment(), 757 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 758 exp.Expression: lambda self: self._parse_expression(), 759 exp.From: lambda self: self._parse_from(joins=True), 760 exp.Group: lambda self: self._parse_group(), 761 exp.Having: lambda self: self._parse_having(), 762 exp.Hint: lambda self: self._parse_hint_body(), 763 exp.Identifier: lambda self: self._parse_id_var(), 764 exp.Join: lambda self: self._parse_join(), 765 exp.Lambda: lambda self: self._parse_lambda(), 766 exp.Lateral: lambda self: self._parse_lateral(), 767 exp.Limit: lambda self: self._parse_limit(), 768 exp.Offset: lambda self: self._parse_offset(), 769 exp.Order: lambda self: self._parse_order(), 770 exp.Ordered: lambda self: self._parse_ordered(), 771 exp.Properties: lambda self: self._parse_properties(), 772 exp.Qualify: lambda self: self._parse_qualify(), 773 exp.Returning: lambda self: self._parse_returning(), 774 exp.Select: lambda self: self._parse_select(), 775 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 776 exp.Table: lambda self: self._parse_table_parts(), 777 exp.TableAlias: lambda self: self._parse_table_alias(), 778 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 779 exp.Where: lambda self: self._parse_where(), 780 exp.Window: lambda self: self._parse_named_window(), 781 exp.With: lambda self: self._parse_with(), 782 "JOIN_TYPE": lambda self: self._parse_join_parts(), 783 } 784 785 STATEMENT_PARSERS = { 786 TokenType.ALTER: lambda self: self._parse_alter(), 787 TokenType.BEGIN: lambda self: self._parse_transaction(), 788 TokenType.CACHE: lambda self: self._parse_cache(), 789 TokenType.COMMENT: lambda self: self._parse_comment(), 790 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 791 TokenType.COPY: lambda self: self._parse_copy(), 792 TokenType.CREATE: lambda self: self._parse_create(), 793 TokenType.DELETE: lambda self: self._parse_delete(), 794 TokenType.DESC: lambda self: self._parse_describe(), 795 TokenType.DESCRIBE: lambda self: self._parse_describe(), 796 TokenType.DROP: lambda self: self._parse_drop(), 797 TokenType.GRANT: lambda self: self._parse_grant(), 798 TokenType.INSERT: lambda self: self._parse_insert(), 799 TokenType.KILL: lambda self: self._parse_kill(), 800 TokenType.LOAD: lambda self: self._parse_load(), 801 TokenType.MERGE: lambda self: self._parse_merge(), 802 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 803 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 804 TokenType.REFRESH: lambda self: self._parse_refresh(), 805 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 806 TokenType.SET: lambda self: self._parse_set(), 807 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 808 TokenType.UNCACHE: lambda self: self._parse_uncache(), 809 TokenType.UPDATE: lambda self: self._parse_update(), 810 TokenType.USE: lambda self: self.expression( 811 exp.Use, 812 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 813 this=self._parse_table(schema=False), 814 ), 815 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 816 } 817 818 UNARY_PARSERS = { 819 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 820 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 821 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 822 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 823 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 824 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 825 } 826 827 STRING_PARSERS = { 828 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 829 exp.RawString, this=token.text 830 ), 831 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 832 exp.National, this=token.text 833 ), 834 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 835 TokenType.STRING: lambda self, token: self.expression( 836 exp.Literal, this=token.text, is_string=True 837 ), 838 TokenType.UNICODE_STRING: lambda self, token: self.expression( 839 exp.UnicodeString, 840 this=token.text, 841 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 842 ), 843 } 844 845 NUMERIC_PARSERS = { 846 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 847 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 848 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 849 TokenType.NUMBER: lambda self, token: self.expression( 850 exp.Literal, this=token.text, is_string=False 851 ), 852 } 853 854 PRIMARY_PARSERS = { 855 **STRING_PARSERS, 856 **NUMERIC_PARSERS, 857 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 858 TokenType.NULL: lambda self, _: self.expression(exp.Null), 859 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 860 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 861 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 862 TokenType.STAR: lambda self, _: self._parse_star_ops(), 863 } 864 865 PLACEHOLDER_PARSERS = { 866 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 867 TokenType.PARAMETER: lambda self: self._parse_parameter(), 868 TokenType.COLON: lambda self: ( 869 self.expression(exp.Placeholder, this=self._prev.text) 870 if self._match_set(self.ID_VAR_TOKENS) 871 else None 872 ), 873 } 874 875 RANGE_PARSERS = { 876 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 877 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 878 TokenType.GLOB: binary_range_parser(exp.Glob), 879 TokenType.ILIKE: binary_range_parser(exp.ILike), 880 TokenType.IN: lambda self, this: self._parse_in(this), 881 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 882 TokenType.IS: lambda self, this: self._parse_is(this), 883 TokenType.LIKE: binary_range_parser(exp.Like), 884 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 885 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 886 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 887 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 888 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 889 } 890 891 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 892 "ALLOWED_VALUES": lambda self: self.expression( 893 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 894 ), 895 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 896 "AUTO": lambda self: self._parse_auto_property(), 897 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 898 "BACKUP": lambda self: self.expression( 899 exp.BackupProperty, this=self._parse_var(any_token=True) 900 ), 901 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 902 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 903 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 904 "CHECKSUM": lambda self: self._parse_checksum(), 905 "CLUSTER BY": lambda self: self._parse_cluster(), 906 "CLUSTERED": lambda self: self._parse_clustered_by(), 907 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 908 exp.CollateProperty, **kwargs 909 ), 910 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 911 "CONTAINS": lambda self: self._parse_contains_property(), 912 "COPY": lambda self: self._parse_copy_property(), 913 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 914 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 915 "DEFINER": lambda self: self._parse_definer(), 916 "DETERMINISTIC": lambda self: self.expression( 917 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 918 ), 919 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 920 "DUPLICATE": lambda self: self._parse_duplicate(), 921 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 922 "DISTKEY": lambda self: self._parse_distkey(), 923 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 924 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 925 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 926 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 927 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 928 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 929 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 930 "FREESPACE": lambda self: self._parse_freespace(), 931 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 932 "HEAP": lambda self: self.expression(exp.HeapProperty), 933 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 934 "IMMUTABLE": lambda self: self.expression( 935 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 936 ), 937 "INHERITS": lambda self: self.expression( 938 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 939 ), 940 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 941 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 942 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 943 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 944 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 945 "LIKE": lambda self: self._parse_create_like(), 946 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 947 "LOCK": lambda self: self._parse_locking(), 948 "LOCKING": lambda self: self._parse_locking(), 949 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 950 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 951 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 952 "MODIFIES": lambda self: self._parse_modifies_property(), 953 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 954 "NO": lambda self: self._parse_no_property(), 955 "ON": lambda self: self._parse_on_property(), 956 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 957 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 958 "PARTITION": lambda self: self._parse_partitioned_of(), 959 "PARTITION BY": lambda self: self._parse_partitioned_by(), 960 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 961 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 962 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 963 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 964 "READS": lambda self: self._parse_reads_property(), 965 "REMOTE": lambda self: self._parse_remote_with_connection(), 966 "RETURNS": lambda self: self._parse_returns(), 967 "STRICT": lambda self: self.expression(exp.StrictProperty), 968 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 969 "ROW": lambda self: self._parse_row(), 970 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 971 "SAMPLE": lambda self: self.expression( 972 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 973 ), 974 "SECURE": lambda self: self.expression(exp.SecureProperty), 975 "SECURITY": lambda self: self._parse_security(), 976 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 977 "SETTINGS": lambda self: self._parse_settings_property(), 978 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 979 "SORTKEY": lambda self: self._parse_sortkey(), 980 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 981 "STABLE": lambda self: self.expression( 982 exp.StabilityProperty, this=exp.Literal.string("STABLE") 983 ), 984 "STORED": lambda self: self._parse_stored(), 985 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 986 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 987 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 988 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 989 "TO": lambda self: self._parse_to_table(), 990 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 991 "TRANSFORM": lambda self: self.expression( 992 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 993 ), 994 "TTL": lambda self: self._parse_ttl(), 995 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 996 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 997 "VOLATILE": lambda self: self._parse_volatile_property(), 998 "WITH": lambda self: self._parse_with_property(), 999 } 1000 1001 CONSTRAINT_PARSERS = { 1002 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1003 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1004 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1005 "CHARACTER SET": lambda self: self.expression( 1006 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1007 ), 1008 "CHECK": lambda self: self.expression( 1009 exp.CheckColumnConstraint, 1010 this=self._parse_wrapped(self._parse_assignment), 1011 enforced=self._match_text_seq("ENFORCED"), 1012 ), 1013 "COLLATE": lambda self: self.expression( 1014 exp.CollateColumnConstraint, 1015 this=self._parse_identifier() or self._parse_column(), 1016 ), 1017 "COMMENT": lambda self: self.expression( 1018 exp.CommentColumnConstraint, this=self._parse_string() 1019 ), 1020 "COMPRESS": lambda self: self._parse_compress(), 1021 "CLUSTERED": lambda self: self.expression( 1022 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1023 ), 1024 "NONCLUSTERED": lambda self: self.expression( 1025 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1026 ), 1027 "DEFAULT": lambda self: self.expression( 1028 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1029 ), 1030 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1031 "EPHEMERAL": lambda self: self.expression( 1032 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1033 ), 1034 "EXCLUDE": lambda self: self.expression( 1035 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1036 ), 1037 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1038 "FORMAT": lambda self: self.expression( 1039 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1040 ), 1041 "GENERATED": lambda self: self._parse_generated_as_identity(), 1042 "IDENTITY": lambda self: self._parse_auto_increment(), 1043 "INLINE": lambda self: self._parse_inline(), 1044 "LIKE": lambda self: self._parse_create_like(), 1045 "NOT": lambda self: self._parse_not_constraint(), 1046 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1047 "ON": lambda self: ( 1048 self._match(TokenType.UPDATE) 1049 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1050 ) 1051 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1052 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1053 "PERIOD": lambda self: self._parse_period_for_system_time(), 1054 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1055 "REFERENCES": lambda self: self._parse_references(match=False), 1056 "TITLE": lambda self: self.expression( 1057 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1058 ), 1059 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1060 "UNIQUE": lambda self: self._parse_unique(), 1061 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1062 "WATERMARK": lambda self: self.expression( 1063 exp.WatermarkColumnConstraint, 1064 this=self._match(TokenType.FOR) and self._parse_column(), 1065 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1066 ), 1067 "WITH": lambda self: self.expression( 1068 exp.Properties, expressions=self._parse_wrapped_properties() 1069 ), 1070 } 1071 1072 ALTER_PARSERS = { 1073 "ADD": lambda self: self._parse_alter_table_add(), 1074 "AS": lambda self: self._parse_select(), 1075 "ALTER": lambda self: self._parse_alter_table_alter(), 1076 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1077 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1078 "DROP": lambda self: self._parse_alter_table_drop(), 1079 "RENAME": lambda self: self._parse_alter_table_rename(), 1080 "SET": lambda self: self._parse_alter_table_set(), 1081 "SWAP": lambda self: self.expression( 1082 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1083 ), 1084 } 1085 1086 ALTER_ALTER_PARSERS = { 1087 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1088 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1089 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1090 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1091 } 1092 1093 SCHEMA_UNNAMED_CONSTRAINTS = { 1094 "CHECK", 1095 "EXCLUDE", 1096 "FOREIGN KEY", 1097 "LIKE", 1098 "PERIOD", 1099 "PRIMARY KEY", 1100 "UNIQUE", 1101 "WATERMARK", 1102 } 1103 1104 NO_PAREN_FUNCTION_PARSERS = { 1105 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1106 "CASE": lambda self: self._parse_case(), 1107 "CONNECT_BY_ROOT": lambda self: self.expression( 1108 exp.ConnectByRoot, this=self._parse_column() 1109 ), 1110 "IF": lambda self: self._parse_if(), 1111 } 1112 1113 INVALID_FUNC_NAME_TOKENS = { 1114 TokenType.IDENTIFIER, 1115 TokenType.STRING, 1116 } 1117 1118 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1119 1120 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1121 1122 FUNCTION_PARSERS = { 1123 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1124 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1125 "DECODE": lambda self: self._parse_decode(), 1126 "EXTRACT": lambda self: self._parse_extract(), 1127 "GAP_FILL": lambda self: self._parse_gap_fill(), 1128 "JSON_OBJECT": lambda self: self._parse_json_object(), 1129 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1130 "JSON_TABLE": lambda self: self._parse_json_table(), 1131 "MATCH": lambda self: self._parse_match_against(), 1132 "NORMALIZE": lambda self: self._parse_normalize(), 1133 "OPENJSON": lambda self: self._parse_open_json(), 1134 "OVERLAY": lambda self: self._parse_overlay(), 1135 "POSITION": lambda self: self._parse_position(), 1136 "PREDICT": lambda self: self._parse_predict(), 1137 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1138 "STRING_AGG": lambda self: self._parse_string_agg(), 1139 "SUBSTRING": lambda self: self._parse_substring(), 1140 "TRIM": lambda self: self._parse_trim(), 1141 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1142 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1143 } 1144 1145 QUERY_MODIFIER_PARSERS = { 1146 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1147 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1148 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1149 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1150 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1151 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1152 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1153 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1154 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1155 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1156 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1157 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1158 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1159 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1160 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1161 TokenType.CLUSTER_BY: lambda self: ( 1162 "cluster", 1163 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1164 ), 1165 TokenType.DISTRIBUTE_BY: lambda self: ( 1166 "distribute", 1167 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1168 ), 1169 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1170 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1171 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1172 } 1173 1174 SET_PARSERS = { 1175 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1176 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1177 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1178 "TRANSACTION": lambda self: self._parse_set_transaction(), 1179 } 1180 1181 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1182 1183 TYPE_LITERAL_PARSERS = { 1184 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1185 } 1186 1187 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1188 1189 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1190 1191 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1192 1193 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1194 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1195 "ISOLATION": ( 1196 ("LEVEL", "REPEATABLE", "READ"), 1197 ("LEVEL", "READ", "COMMITTED"), 1198 ("LEVEL", "READ", "UNCOMITTED"), 1199 ("LEVEL", "SERIALIZABLE"), 1200 ), 1201 "READ": ("WRITE", "ONLY"), 1202 } 1203 1204 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1205 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1206 ) 1207 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1208 1209 CREATE_SEQUENCE: OPTIONS_TYPE = { 1210 "SCALE": ("EXTEND", "NOEXTEND"), 1211 "SHARD": ("EXTEND", "NOEXTEND"), 1212 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1213 **dict.fromkeys( 1214 ( 1215 "SESSION", 1216 "GLOBAL", 1217 "KEEP", 1218 "NOKEEP", 1219 "ORDER", 1220 "NOORDER", 1221 "NOCACHE", 1222 "CYCLE", 1223 "NOCYCLE", 1224 "NOMINVALUE", 1225 "NOMAXVALUE", 1226 "NOSCALE", 1227 "NOSHARD", 1228 ), 1229 tuple(), 1230 ), 1231 } 1232 1233 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1234 1235 USABLES: OPTIONS_TYPE = dict.fromkeys( 1236 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1237 ) 1238 1239 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1240 1241 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1242 "TYPE": ("EVOLUTION",), 1243 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1244 } 1245 1246 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1247 1248 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1249 1250 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1251 "NOT": ("ENFORCED",), 1252 "MATCH": ( 1253 "FULL", 1254 "PARTIAL", 1255 "SIMPLE", 1256 ), 1257 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1258 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1259 } 1260 1261 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1262 1263 CLONE_KEYWORDS = {"CLONE", "COPY"} 1264 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1265 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1266 1267 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1268 1269 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1270 1271 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1272 1273 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1274 1275 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1276 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1277 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1278 1279 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1280 1281 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1282 1283 ADD_CONSTRAINT_TOKENS = { 1284 TokenType.CONSTRAINT, 1285 TokenType.FOREIGN_KEY, 1286 TokenType.INDEX, 1287 TokenType.KEY, 1288 TokenType.PRIMARY_KEY, 1289 TokenType.UNIQUE, 1290 } 1291 1292 DISTINCT_TOKENS = {TokenType.DISTINCT} 1293 1294 NULL_TOKENS = {TokenType.NULL} 1295 1296 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1297 1298 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1299 1300 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1301 1302 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1303 1304 ODBC_DATETIME_LITERALS = { 1305 "d": exp.Date, 1306 "t": exp.Time, 1307 "ts": exp.Timestamp, 1308 } 1309 1310 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1311 1312 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1313 1314 # The style options for the DESCRIBE statement 1315 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1316 1317 OPERATION_MODIFIERS: t.Set[str] = set() 1318 1319 STRICT_CAST = True 1320 1321 PREFIXED_PIVOT_COLUMNS = False 1322 IDENTIFY_PIVOT_STRINGS = False 1323 1324 LOG_DEFAULTS_TO_LN = False 1325 1326 # Whether ADD is present for each column added by ALTER TABLE 1327 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1328 1329 # Whether the table sample clause expects CSV syntax 1330 TABLESAMPLE_CSV = False 1331 1332 # The default method used for table sampling 1333 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1334 1335 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1336 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1337 1338 # Whether the TRIM function expects the characters to trim as its first argument 1339 TRIM_PATTERN_FIRST = False 1340 1341 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1342 STRING_ALIASES = False 1343 1344 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1345 MODIFIERS_ATTACHED_TO_SET_OP = True 1346 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1347 1348 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1349 NO_PAREN_IF_COMMANDS = True 1350 1351 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1352 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1353 1354 # Whether the `:` operator is used to extract a value from a VARIANT column 1355 COLON_IS_VARIANT_EXTRACT = False 1356 1357 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1358 # If this is True and '(' is not found, the keyword will be treated as an identifier 1359 VALUES_FOLLOWED_BY_PAREN = True 1360 1361 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1362 SUPPORTS_IMPLICIT_UNNEST = False 1363 1364 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1365 INTERVAL_SPANS = True 1366 1367 # Whether a PARTITION clause can follow a table reference 1368 SUPPORTS_PARTITION_SELECTION = False 1369 1370 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1371 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1372 1373 __slots__ = ( 1374 "error_level", 1375 "error_message_context", 1376 "max_errors", 1377 "dialect", 1378 "sql", 1379 "errors", 1380 "_tokens", 1381 "_index", 1382 "_curr", 1383 "_next", 1384 "_prev", 1385 "_prev_comments", 1386 ) 1387 1388 # Autofilled 1389 SHOW_TRIE: t.Dict = {} 1390 SET_TRIE: t.Dict = {} 1391 1392 def __init__( 1393 self, 1394 error_level: t.Optional[ErrorLevel] = None, 1395 error_message_context: int = 100, 1396 max_errors: int = 3, 1397 dialect: DialectType = None, 1398 ): 1399 from sqlglot.dialects import Dialect 1400 1401 self.error_level = error_level or ErrorLevel.IMMEDIATE 1402 self.error_message_context = error_message_context 1403 self.max_errors = max_errors 1404 self.dialect = Dialect.get_or_raise(dialect) 1405 self.reset() 1406 1407 def reset(self): 1408 self.sql = "" 1409 self.errors = [] 1410 self._tokens = [] 1411 self._index = 0 1412 self._curr = None 1413 self._next = None 1414 self._prev = None 1415 self._prev_comments = None 1416 1417 def parse( 1418 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1419 ) -> t.List[t.Optional[exp.Expression]]: 1420 """ 1421 Parses a list of tokens and returns a list of syntax trees, one tree 1422 per parsed SQL statement. 1423 1424 Args: 1425 raw_tokens: The list of tokens. 1426 sql: The original SQL string, used to produce helpful debug messages. 1427 1428 Returns: 1429 The list of the produced syntax trees. 1430 """ 1431 return self._parse( 1432 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1433 ) 1434 1435 def parse_into( 1436 self, 1437 expression_types: exp.IntoType, 1438 raw_tokens: t.List[Token], 1439 sql: t.Optional[str] = None, 1440 ) -> t.List[t.Optional[exp.Expression]]: 1441 """ 1442 Parses a list of tokens into a given Expression type. If a collection of Expression 1443 types is given instead, this method will try to parse the token list into each one 1444 of them, stopping at the first for which the parsing succeeds. 1445 1446 Args: 1447 expression_types: The expression type(s) to try and parse the token list into. 1448 raw_tokens: The list of tokens. 1449 sql: The original SQL string, used to produce helpful debug messages. 1450 1451 Returns: 1452 The target Expression. 1453 """ 1454 errors = [] 1455 for expression_type in ensure_list(expression_types): 1456 parser = self.EXPRESSION_PARSERS.get(expression_type) 1457 if not parser: 1458 raise TypeError(f"No parser registered for {expression_type}") 1459 1460 try: 1461 return self._parse(parser, raw_tokens, sql) 1462 except ParseError as e: 1463 e.errors[0]["into_expression"] = expression_type 1464 errors.append(e) 1465 1466 raise ParseError( 1467 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1468 errors=merge_errors(errors), 1469 ) from errors[-1] 1470 1471 def _parse( 1472 self, 1473 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1474 raw_tokens: t.List[Token], 1475 sql: t.Optional[str] = None, 1476 ) -> t.List[t.Optional[exp.Expression]]: 1477 self.reset() 1478 self.sql = sql or "" 1479 1480 total = len(raw_tokens) 1481 chunks: t.List[t.List[Token]] = [[]] 1482 1483 for i, token in enumerate(raw_tokens): 1484 if token.token_type == TokenType.SEMICOLON: 1485 if token.comments: 1486 chunks.append([token]) 1487 1488 if i < total - 1: 1489 chunks.append([]) 1490 else: 1491 chunks[-1].append(token) 1492 1493 expressions = [] 1494 1495 for tokens in chunks: 1496 self._index = -1 1497 self._tokens = tokens 1498 self._advance() 1499 1500 expressions.append(parse_method(self)) 1501 1502 if self._index < len(self._tokens): 1503 self.raise_error("Invalid expression / Unexpected token") 1504 1505 self.check_errors() 1506 1507 return expressions 1508 1509 def check_errors(self) -> None: 1510 """Logs or raises any found errors, depending on the chosen error level setting.""" 1511 if self.error_level == ErrorLevel.WARN: 1512 for error in self.errors: 1513 logger.error(str(error)) 1514 elif self.error_level == ErrorLevel.RAISE and self.errors: 1515 raise ParseError( 1516 concat_messages(self.errors, self.max_errors), 1517 errors=merge_errors(self.errors), 1518 ) 1519 1520 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1521 """ 1522 Appends an error in the list of recorded errors or raises it, depending on the chosen 1523 error level setting. 1524 """ 1525 token = token or self._curr or self._prev or Token.string("") 1526 start = token.start 1527 end = token.end + 1 1528 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1529 highlight = self.sql[start:end] 1530 end_context = self.sql[end : end + self.error_message_context] 1531 1532 error = ParseError.new( 1533 f"{message}. Line {token.line}, Col: {token.col}.\n" 1534 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1535 description=message, 1536 line=token.line, 1537 col=token.col, 1538 start_context=start_context, 1539 highlight=highlight, 1540 end_context=end_context, 1541 ) 1542 1543 if self.error_level == ErrorLevel.IMMEDIATE: 1544 raise error 1545 1546 self.errors.append(error) 1547 1548 def expression( 1549 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1550 ) -> E: 1551 """ 1552 Creates a new, validated Expression. 1553 1554 Args: 1555 exp_class: The expression class to instantiate. 1556 comments: An optional list of comments to attach to the expression. 1557 kwargs: The arguments to set for the expression along with their respective values. 1558 1559 Returns: 1560 The target expression. 1561 """ 1562 instance = exp_class(**kwargs) 1563 instance.add_comments(comments) if comments else self._add_comments(instance) 1564 return self.validate_expression(instance) 1565 1566 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1567 if expression and self._prev_comments: 1568 expression.add_comments(self._prev_comments) 1569 self._prev_comments = None 1570 1571 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1572 """ 1573 Validates an Expression, making sure that all its mandatory arguments are set. 1574 1575 Args: 1576 expression: The expression to validate. 1577 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1578 1579 Returns: 1580 The validated expression. 1581 """ 1582 if self.error_level != ErrorLevel.IGNORE: 1583 for error_message in expression.error_messages(args): 1584 self.raise_error(error_message) 1585 1586 return expression 1587 1588 def _find_sql(self, start: Token, end: Token) -> str: 1589 return self.sql[start.start : end.end + 1] 1590 1591 def _is_connected(self) -> bool: 1592 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1593 1594 def _advance(self, times: int = 1) -> None: 1595 self._index += times 1596 self._curr = seq_get(self._tokens, self._index) 1597 self._next = seq_get(self._tokens, self._index + 1) 1598 1599 if self._index > 0: 1600 self._prev = self._tokens[self._index - 1] 1601 self._prev_comments = self._prev.comments 1602 else: 1603 self._prev = None 1604 self._prev_comments = None 1605 1606 def _retreat(self, index: int) -> None: 1607 if index != self._index: 1608 self._advance(index - self._index) 1609 1610 def _warn_unsupported(self) -> None: 1611 if len(self._tokens) <= 1: 1612 return 1613 1614 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1615 # interested in emitting a warning for the one being currently processed. 1616 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1617 1618 logger.warning( 1619 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1620 ) 1621 1622 def _parse_command(self) -> exp.Command: 1623 self._warn_unsupported() 1624 return self.expression( 1625 exp.Command, 1626 comments=self._prev_comments, 1627 this=self._prev.text.upper(), 1628 expression=self._parse_string(), 1629 ) 1630 1631 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1632 """ 1633 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1634 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1635 solve this by setting & resetting the parser state accordingly 1636 """ 1637 index = self._index 1638 error_level = self.error_level 1639 1640 self.error_level = ErrorLevel.IMMEDIATE 1641 try: 1642 this = parse_method() 1643 except ParseError: 1644 this = None 1645 finally: 1646 if not this or retreat: 1647 self._retreat(index) 1648 self.error_level = error_level 1649 1650 return this 1651 1652 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1653 start = self._prev 1654 exists = self._parse_exists() if allow_exists else None 1655 1656 self._match(TokenType.ON) 1657 1658 materialized = self._match_text_seq("MATERIALIZED") 1659 kind = self._match_set(self.CREATABLES) and self._prev 1660 if not kind: 1661 return self._parse_as_command(start) 1662 1663 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1664 this = self._parse_user_defined_function(kind=kind.token_type) 1665 elif kind.token_type == TokenType.TABLE: 1666 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1667 elif kind.token_type == TokenType.COLUMN: 1668 this = self._parse_column() 1669 else: 1670 this = self._parse_id_var() 1671 1672 self._match(TokenType.IS) 1673 1674 return self.expression( 1675 exp.Comment, 1676 this=this, 1677 kind=kind.text, 1678 expression=self._parse_string(), 1679 exists=exists, 1680 materialized=materialized, 1681 ) 1682 1683 def _parse_to_table( 1684 self, 1685 ) -> exp.ToTableProperty: 1686 table = self._parse_table_parts(schema=True) 1687 return self.expression(exp.ToTableProperty, this=table) 1688 1689 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1690 def _parse_ttl(self) -> exp.Expression: 1691 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1692 this = self._parse_bitwise() 1693 1694 if self._match_text_seq("DELETE"): 1695 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1696 if self._match_text_seq("RECOMPRESS"): 1697 return self.expression( 1698 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1699 ) 1700 if self._match_text_seq("TO", "DISK"): 1701 return self.expression( 1702 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1703 ) 1704 if self._match_text_seq("TO", "VOLUME"): 1705 return self.expression( 1706 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1707 ) 1708 1709 return this 1710 1711 expressions = self._parse_csv(_parse_ttl_action) 1712 where = self._parse_where() 1713 group = self._parse_group() 1714 1715 aggregates = None 1716 if group and self._match(TokenType.SET): 1717 aggregates = self._parse_csv(self._parse_set_item) 1718 1719 return self.expression( 1720 exp.MergeTreeTTL, 1721 expressions=expressions, 1722 where=where, 1723 group=group, 1724 aggregates=aggregates, 1725 ) 1726 1727 def _parse_statement(self) -> t.Optional[exp.Expression]: 1728 if self._curr is None: 1729 return None 1730 1731 if self._match_set(self.STATEMENT_PARSERS): 1732 comments = self._prev_comments 1733 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1734 stmt.add_comments(comments, prepend=True) 1735 return stmt 1736 1737 if self._match_set(self.dialect.tokenizer.COMMANDS): 1738 return self._parse_command() 1739 1740 expression = self._parse_expression() 1741 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1742 return self._parse_query_modifiers(expression) 1743 1744 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1745 start = self._prev 1746 temporary = self._match(TokenType.TEMPORARY) 1747 materialized = self._match_text_seq("MATERIALIZED") 1748 1749 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1750 if not kind: 1751 return self._parse_as_command(start) 1752 1753 concurrently = self._match_text_seq("CONCURRENTLY") 1754 if_exists = exists or self._parse_exists() 1755 1756 if kind == "COLUMN": 1757 this = self._parse_column() 1758 else: 1759 this = self._parse_table_parts( 1760 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1761 ) 1762 1763 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1764 1765 if self._match(TokenType.L_PAREN, advance=False): 1766 expressions = self._parse_wrapped_csv(self._parse_types) 1767 else: 1768 expressions = None 1769 1770 return self.expression( 1771 exp.Drop, 1772 exists=if_exists, 1773 this=this, 1774 expressions=expressions, 1775 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1776 temporary=temporary, 1777 materialized=materialized, 1778 cascade=self._match_text_seq("CASCADE"), 1779 constraints=self._match_text_seq("CONSTRAINTS"), 1780 purge=self._match_text_seq("PURGE"), 1781 cluster=cluster, 1782 concurrently=concurrently, 1783 ) 1784 1785 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1786 return ( 1787 self._match_text_seq("IF") 1788 and (not not_ or self._match(TokenType.NOT)) 1789 and self._match(TokenType.EXISTS) 1790 ) 1791 1792 def _parse_create(self) -> exp.Create | exp.Command: 1793 # Note: this can't be None because we've matched a statement parser 1794 start = self._prev 1795 1796 replace = ( 1797 start.token_type == TokenType.REPLACE 1798 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1799 or self._match_pair(TokenType.OR, TokenType.ALTER) 1800 ) 1801 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1802 1803 unique = self._match(TokenType.UNIQUE) 1804 1805 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1806 clustered = True 1807 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1808 "COLUMNSTORE" 1809 ): 1810 clustered = False 1811 else: 1812 clustered = None 1813 1814 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1815 self._advance() 1816 1817 properties = None 1818 create_token = self._match_set(self.CREATABLES) and self._prev 1819 1820 if not create_token: 1821 # exp.Properties.Location.POST_CREATE 1822 properties = self._parse_properties() 1823 create_token = self._match_set(self.CREATABLES) and self._prev 1824 1825 if not properties or not create_token: 1826 return self._parse_as_command(start) 1827 1828 concurrently = self._match_text_seq("CONCURRENTLY") 1829 exists = self._parse_exists(not_=True) 1830 this = None 1831 expression: t.Optional[exp.Expression] = None 1832 indexes = None 1833 no_schema_binding = None 1834 begin = None 1835 end = None 1836 clone = None 1837 1838 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1839 nonlocal properties 1840 if properties and temp_props: 1841 properties.expressions.extend(temp_props.expressions) 1842 elif temp_props: 1843 properties = temp_props 1844 1845 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1846 this = self._parse_user_defined_function(kind=create_token.token_type) 1847 1848 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1849 extend_props(self._parse_properties()) 1850 1851 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1852 extend_props(self._parse_properties()) 1853 1854 if not expression: 1855 if self._match(TokenType.COMMAND): 1856 expression = self._parse_as_command(self._prev) 1857 else: 1858 begin = self._match(TokenType.BEGIN) 1859 return_ = self._match_text_seq("RETURN") 1860 1861 if self._match(TokenType.STRING, advance=False): 1862 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1863 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1864 expression = self._parse_string() 1865 extend_props(self._parse_properties()) 1866 else: 1867 expression = self._parse_user_defined_function_expression() 1868 1869 end = self._match_text_seq("END") 1870 1871 if return_: 1872 expression = self.expression(exp.Return, this=expression) 1873 elif create_token.token_type == TokenType.INDEX: 1874 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1875 if not self._match(TokenType.ON): 1876 index = self._parse_id_var() 1877 anonymous = False 1878 else: 1879 index = None 1880 anonymous = True 1881 1882 this = self._parse_index(index=index, anonymous=anonymous) 1883 elif create_token.token_type in self.DB_CREATABLES: 1884 table_parts = self._parse_table_parts( 1885 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1886 ) 1887 1888 # exp.Properties.Location.POST_NAME 1889 self._match(TokenType.COMMA) 1890 extend_props(self._parse_properties(before=True)) 1891 1892 this = self._parse_schema(this=table_parts) 1893 1894 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1895 extend_props(self._parse_properties()) 1896 1897 self._match(TokenType.ALIAS) 1898 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1899 # exp.Properties.Location.POST_ALIAS 1900 extend_props(self._parse_properties()) 1901 1902 if create_token.token_type == TokenType.SEQUENCE: 1903 expression = self._parse_types() 1904 extend_props(self._parse_properties()) 1905 else: 1906 expression = self._parse_ddl_select() 1907 1908 if create_token.token_type == TokenType.TABLE: 1909 # exp.Properties.Location.POST_EXPRESSION 1910 extend_props(self._parse_properties()) 1911 1912 indexes = [] 1913 while True: 1914 index = self._parse_index() 1915 1916 # exp.Properties.Location.POST_INDEX 1917 extend_props(self._parse_properties()) 1918 if not index: 1919 break 1920 else: 1921 self._match(TokenType.COMMA) 1922 indexes.append(index) 1923 elif create_token.token_type == TokenType.VIEW: 1924 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1925 no_schema_binding = True 1926 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1927 extend_props(self._parse_properties()) 1928 1929 shallow = self._match_text_seq("SHALLOW") 1930 1931 if self._match_texts(self.CLONE_KEYWORDS): 1932 copy = self._prev.text.lower() == "copy" 1933 clone = self.expression( 1934 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1935 ) 1936 1937 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1938 return self._parse_as_command(start) 1939 1940 create_kind_text = create_token.text.upper() 1941 return self.expression( 1942 exp.Create, 1943 this=this, 1944 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1945 replace=replace, 1946 refresh=refresh, 1947 unique=unique, 1948 expression=expression, 1949 exists=exists, 1950 properties=properties, 1951 indexes=indexes, 1952 no_schema_binding=no_schema_binding, 1953 begin=begin, 1954 end=end, 1955 clone=clone, 1956 concurrently=concurrently, 1957 clustered=clustered, 1958 ) 1959 1960 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1961 seq = exp.SequenceProperties() 1962 1963 options = [] 1964 index = self._index 1965 1966 while self._curr: 1967 self._match(TokenType.COMMA) 1968 if self._match_text_seq("INCREMENT"): 1969 self._match_text_seq("BY") 1970 self._match_text_seq("=") 1971 seq.set("increment", self._parse_term()) 1972 elif self._match_text_seq("MINVALUE"): 1973 seq.set("minvalue", self._parse_term()) 1974 elif self._match_text_seq("MAXVALUE"): 1975 seq.set("maxvalue", self._parse_term()) 1976 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1977 self._match_text_seq("=") 1978 seq.set("start", self._parse_term()) 1979 elif self._match_text_seq("CACHE"): 1980 # T-SQL allows empty CACHE which is initialized dynamically 1981 seq.set("cache", self._parse_number() or True) 1982 elif self._match_text_seq("OWNED", "BY"): 1983 # "OWNED BY NONE" is the default 1984 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1985 else: 1986 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1987 if opt: 1988 options.append(opt) 1989 else: 1990 break 1991 1992 seq.set("options", options if options else None) 1993 return None if self._index == index else seq 1994 1995 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1996 # only used for teradata currently 1997 self._match(TokenType.COMMA) 1998 1999 kwargs = { 2000 "no": self._match_text_seq("NO"), 2001 "dual": self._match_text_seq("DUAL"), 2002 "before": self._match_text_seq("BEFORE"), 2003 "default": self._match_text_seq("DEFAULT"), 2004 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2005 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2006 "after": self._match_text_seq("AFTER"), 2007 "minimum": self._match_texts(("MIN", "MINIMUM")), 2008 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2009 } 2010 2011 if self._match_texts(self.PROPERTY_PARSERS): 2012 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2013 try: 2014 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2015 except TypeError: 2016 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2017 2018 return None 2019 2020 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2021 return self._parse_wrapped_csv(self._parse_property) 2022 2023 def _parse_property(self) -> t.Optional[exp.Expression]: 2024 if self._match_texts(self.PROPERTY_PARSERS): 2025 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2026 2027 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2028 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2029 2030 if self._match_text_seq("COMPOUND", "SORTKEY"): 2031 return self._parse_sortkey(compound=True) 2032 2033 if self._match_text_seq("SQL", "SECURITY"): 2034 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2035 2036 index = self._index 2037 key = self._parse_column() 2038 2039 if not self._match(TokenType.EQ): 2040 self._retreat(index) 2041 return self._parse_sequence_properties() 2042 2043 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2044 if isinstance(key, exp.Column): 2045 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2046 2047 value = self._parse_bitwise() or self._parse_var(any_token=True) 2048 2049 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2050 if isinstance(value, exp.Column): 2051 value = exp.var(value.name) 2052 2053 return self.expression(exp.Property, this=key, value=value) 2054 2055 def _parse_stored(self) -> exp.FileFormatProperty: 2056 self._match(TokenType.ALIAS) 2057 2058 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2059 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2060 2061 return self.expression( 2062 exp.FileFormatProperty, 2063 this=( 2064 self.expression( 2065 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2066 ) 2067 if input_format or output_format 2068 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2069 ), 2070 ) 2071 2072 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2073 field = self._parse_field() 2074 if isinstance(field, exp.Identifier) and not field.quoted: 2075 field = exp.var(field) 2076 2077 return field 2078 2079 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2080 self._match(TokenType.EQ) 2081 self._match(TokenType.ALIAS) 2082 2083 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2084 2085 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2086 properties = [] 2087 while True: 2088 if before: 2089 prop = self._parse_property_before() 2090 else: 2091 prop = self._parse_property() 2092 if not prop: 2093 break 2094 for p in ensure_list(prop): 2095 properties.append(p) 2096 2097 if properties: 2098 return self.expression(exp.Properties, expressions=properties) 2099 2100 return None 2101 2102 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2103 return self.expression( 2104 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2105 ) 2106 2107 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2108 if self._match_texts(("DEFINER", "INVOKER")): 2109 security_specifier = self._prev.text.upper() 2110 return self.expression(exp.SecurityProperty, this=security_specifier) 2111 return None 2112 2113 def _parse_settings_property(self) -> exp.SettingsProperty: 2114 return self.expression( 2115 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2116 ) 2117 2118 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2119 if self._index >= 2: 2120 pre_volatile_token = self._tokens[self._index - 2] 2121 else: 2122 pre_volatile_token = None 2123 2124 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2125 return exp.VolatileProperty() 2126 2127 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2128 2129 def _parse_retention_period(self) -> exp.Var: 2130 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2131 number = self._parse_number() 2132 number_str = f"{number} " if number else "" 2133 unit = self._parse_var(any_token=True) 2134 return exp.var(f"{number_str}{unit}") 2135 2136 def _parse_system_versioning_property( 2137 self, with_: bool = False 2138 ) -> exp.WithSystemVersioningProperty: 2139 self._match(TokenType.EQ) 2140 prop = self.expression( 2141 exp.WithSystemVersioningProperty, 2142 **{ # type: ignore 2143 "on": True, 2144 "with": with_, 2145 }, 2146 ) 2147 2148 if self._match_text_seq("OFF"): 2149 prop.set("on", False) 2150 return prop 2151 2152 self._match(TokenType.ON) 2153 if self._match(TokenType.L_PAREN): 2154 while self._curr and not self._match(TokenType.R_PAREN): 2155 if self._match_text_seq("HISTORY_TABLE", "="): 2156 prop.set("this", self._parse_table_parts()) 2157 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2158 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2159 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2160 prop.set("retention_period", self._parse_retention_period()) 2161 2162 self._match(TokenType.COMMA) 2163 2164 return prop 2165 2166 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2167 self._match(TokenType.EQ) 2168 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2169 prop = self.expression(exp.DataDeletionProperty, on=on) 2170 2171 if self._match(TokenType.L_PAREN): 2172 while self._curr and not self._match(TokenType.R_PAREN): 2173 if self._match_text_seq("FILTER_COLUMN", "="): 2174 prop.set("filter_column", self._parse_column()) 2175 elif self._match_text_seq("RETENTION_PERIOD", "="): 2176 prop.set("retention_period", self._parse_retention_period()) 2177 2178 self._match(TokenType.COMMA) 2179 2180 return prop 2181 2182 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2183 kind = "HASH" 2184 expressions: t.Optional[t.List[exp.Expression]] = None 2185 if self._match_text_seq("BY", "HASH"): 2186 expressions = self._parse_wrapped_csv(self._parse_id_var) 2187 elif self._match_text_seq("BY", "RANDOM"): 2188 kind = "RANDOM" 2189 2190 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2191 buckets: t.Optional[exp.Expression] = None 2192 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2193 buckets = self._parse_number() 2194 2195 return self.expression( 2196 exp.DistributedByProperty, 2197 expressions=expressions, 2198 kind=kind, 2199 buckets=buckets, 2200 order=self._parse_order(), 2201 ) 2202 2203 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2204 self._match_text_seq("KEY") 2205 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2206 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2207 2208 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2209 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2210 prop = self._parse_system_versioning_property(with_=True) 2211 self._match_r_paren() 2212 return prop 2213 2214 if self._match(TokenType.L_PAREN, advance=False): 2215 return self._parse_wrapped_properties() 2216 2217 if self._match_text_seq("JOURNAL"): 2218 return self._parse_withjournaltable() 2219 2220 if self._match_texts(self.VIEW_ATTRIBUTES): 2221 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2222 2223 if self._match_text_seq("DATA"): 2224 return self._parse_withdata(no=False) 2225 elif self._match_text_seq("NO", "DATA"): 2226 return self._parse_withdata(no=True) 2227 2228 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2229 return self._parse_serde_properties(with_=True) 2230 2231 if self._match(TokenType.SCHEMA): 2232 return self.expression( 2233 exp.WithSchemaBindingProperty, 2234 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2235 ) 2236 2237 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2238 return self.expression( 2239 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2240 ) 2241 2242 if not self._next: 2243 return None 2244 2245 return self._parse_withisolatedloading() 2246 2247 def _parse_procedure_option(self) -> exp.Expression | None: 2248 if self._match_text_seq("EXECUTE", "AS"): 2249 return self.expression( 2250 exp.ExecuteAsProperty, 2251 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2252 or self._parse_string(), 2253 ) 2254 2255 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2256 2257 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2258 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2259 self._match(TokenType.EQ) 2260 2261 user = self._parse_id_var() 2262 self._match(TokenType.PARAMETER) 2263 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2264 2265 if not user or not host: 2266 return None 2267 2268 return exp.DefinerProperty(this=f"{user}@{host}") 2269 2270 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2271 self._match(TokenType.TABLE) 2272 self._match(TokenType.EQ) 2273 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2274 2275 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2276 return self.expression(exp.LogProperty, no=no) 2277 2278 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2279 return self.expression(exp.JournalProperty, **kwargs) 2280 2281 def _parse_checksum(self) -> exp.ChecksumProperty: 2282 self._match(TokenType.EQ) 2283 2284 on = None 2285 if self._match(TokenType.ON): 2286 on = True 2287 elif self._match_text_seq("OFF"): 2288 on = False 2289 2290 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2291 2292 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2293 return self.expression( 2294 exp.Cluster, 2295 expressions=( 2296 self._parse_wrapped_csv(self._parse_ordered) 2297 if wrapped 2298 else self._parse_csv(self._parse_ordered) 2299 ), 2300 ) 2301 2302 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2303 self._match_text_seq("BY") 2304 2305 self._match_l_paren() 2306 expressions = self._parse_csv(self._parse_column) 2307 self._match_r_paren() 2308 2309 if self._match_text_seq("SORTED", "BY"): 2310 self._match_l_paren() 2311 sorted_by = self._parse_csv(self._parse_ordered) 2312 self._match_r_paren() 2313 else: 2314 sorted_by = None 2315 2316 self._match(TokenType.INTO) 2317 buckets = self._parse_number() 2318 self._match_text_seq("BUCKETS") 2319 2320 return self.expression( 2321 exp.ClusteredByProperty, 2322 expressions=expressions, 2323 sorted_by=sorted_by, 2324 buckets=buckets, 2325 ) 2326 2327 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2328 if not self._match_text_seq("GRANTS"): 2329 self._retreat(self._index - 1) 2330 return None 2331 2332 return self.expression(exp.CopyGrantsProperty) 2333 2334 def _parse_freespace(self) -> exp.FreespaceProperty: 2335 self._match(TokenType.EQ) 2336 return self.expression( 2337 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2338 ) 2339 2340 def _parse_mergeblockratio( 2341 self, no: bool = False, default: bool = False 2342 ) -> exp.MergeBlockRatioProperty: 2343 if self._match(TokenType.EQ): 2344 return self.expression( 2345 exp.MergeBlockRatioProperty, 2346 this=self._parse_number(), 2347 percent=self._match(TokenType.PERCENT), 2348 ) 2349 2350 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2351 2352 def _parse_datablocksize( 2353 self, 2354 default: t.Optional[bool] = None, 2355 minimum: t.Optional[bool] = None, 2356 maximum: t.Optional[bool] = None, 2357 ) -> exp.DataBlocksizeProperty: 2358 self._match(TokenType.EQ) 2359 size = self._parse_number() 2360 2361 units = None 2362 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2363 units = self._prev.text 2364 2365 return self.expression( 2366 exp.DataBlocksizeProperty, 2367 size=size, 2368 units=units, 2369 default=default, 2370 minimum=minimum, 2371 maximum=maximum, 2372 ) 2373 2374 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2375 self._match(TokenType.EQ) 2376 always = self._match_text_seq("ALWAYS") 2377 manual = self._match_text_seq("MANUAL") 2378 never = self._match_text_seq("NEVER") 2379 default = self._match_text_seq("DEFAULT") 2380 2381 autotemp = None 2382 if self._match_text_seq("AUTOTEMP"): 2383 autotemp = self._parse_schema() 2384 2385 return self.expression( 2386 exp.BlockCompressionProperty, 2387 always=always, 2388 manual=manual, 2389 never=never, 2390 default=default, 2391 autotemp=autotemp, 2392 ) 2393 2394 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2395 index = self._index 2396 no = self._match_text_seq("NO") 2397 concurrent = self._match_text_seq("CONCURRENT") 2398 2399 if not self._match_text_seq("ISOLATED", "LOADING"): 2400 self._retreat(index) 2401 return None 2402 2403 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2404 return self.expression( 2405 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2406 ) 2407 2408 def _parse_locking(self) -> exp.LockingProperty: 2409 if self._match(TokenType.TABLE): 2410 kind = "TABLE" 2411 elif self._match(TokenType.VIEW): 2412 kind = "VIEW" 2413 elif self._match(TokenType.ROW): 2414 kind = "ROW" 2415 elif self._match_text_seq("DATABASE"): 2416 kind = "DATABASE" 2417 else: 2418 kind = None 2419 2420 if kind in ("DATABASE", "TABLE", "VIEW"): 2421 this = self._parse_table_parts() 2422 else: 2423 this = None 2424 2425 if self._match(TokenType.FOR): 2426 for_or_in = "FOR" 2427 elif self._match(TokenType.IN): 2428 for_or_in = "IN" 2429 else: 2430 for_or_in = None 2431 2432 if self._match_text_seq("ACCESS"): 2433 lock_type = "ACCESS" 2434 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2435 lock_type = "EXCLUSIVE" 2436 elif self._match_text_seq("SHARE"): 2437 lock_type = "SHARE" 2438 elif self._match_text_seq("READ"): 2439 lock_type = "READ" 2440 elif self._match_text_seq("WRITE"): 2441 lock_type = "WRITE" 2442 elif self._match_text_seq("CHECKSUM"): 2443 lock_type = "CHECKSUM" 2444 else: 2445 lock_type = None 2446 2447 override = self._match_text_seq("OVERRIDE") 2448 2449 return self.expression( 2450 exp.LockingProperty, 2451 this=this, 2452 kind=kind, 2453 for_or_in=for_or_in, 2454 lock_type=lock_type, 2455 override=override, 2456 ) 2457 2458 def _parse_partition_by(self) -> t.List[exp.Expression]: 2459 if self._match(TokenType.PARTITION_BY): 2460 return self._parse_csv(self._parse_assignment) 2461 return [] 2462 2463 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2464 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2465 if self._match_text_seq("MINVALUE"): 2466 return exp.var("MINVALUE") 2467 if self._match_text_seq("MAXVALUE"): 2468 return exp.var("MAXVALUE") 2469 return self._parse_bitwise() 2470 2471 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2472 expression = None 2473 from_expressions = None 2474 to_expressions = None 2475 2476 if self._match(TokenType.IN): 2477 this = self._parse_wrapped_csv(self._parse_bitwise) 2478 elif self._match(TokenType.FROM): 2479 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2480 self._match_text_seq("TO") 2481 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2482 elif self._match_text_seq("WITH", "(", "MODULUS"): 2483 this = self._parse_number() 2484 self._match_text_seq(",", "REMAINDER") 2485 expression = self._parse_number() 2486 self._match_r_paren() 2487 else: 2488 self.raise_error("Failed to parse partition bound spec.") 2489 2490 return self.expression( 2491 exp.PartitionBoundSpec, 2492 this=this, 2493 expression=expression, 2494 from_expressions=from_expressions, 2495 to_expressions=to_expressions, 2496 ) 2497 2498 # https://www.postgresql.org/docs/current/sql-createtable.html 2499 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2500 if not self._match_text_seq("OF"): 2501 self._retreat(self._index - 1) 2502 return None 2503 2504 this = self._parse_table(schema=True) 2505 2506 if self._match(TokenType.DEFAULT): 2507 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2508 elif self._match_text_seq("FOR", "VALUES"): 2509 expression = self._parse_partition_bound_spec() 2510 else: 2511 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2512 2513 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2514 2515 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2516 self._match(TokenType.EQ) 2517 return self.expression( 2518 exp.PartitionedByProperty, 2519 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2520 ) 2521 2522 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2523 if self._match_text_seq("AND", "STATISTICS"): 2524 statistics = True 2525 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2526 statistics = False 2527 else: 2528 statistics = None 2529 2530 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2531 2532 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2533 if self._match_text_seq("SQL"): 2534 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2535 return None 2536 2537 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2538 if self._match_text_seq("SQL", "DATA"): 2539 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2540 return None 2541 2542 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2543 if self._match_text_seq("PRIMARY", "INDEX"): 2544 return exp.NoPrimaryIndexProperty() 2545 if self._match_text_seq("SQL"): 2546 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2547 return None 2548 2549 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2550 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2551 return exp.OnCommitProperty() 2552 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2553 return exp.OnCommitProperty(delete=True) 2554 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2555 2556 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2557 if self._match_text_seq("SQL", "DATA"): 2558 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2559 return None 2560 2561 def _parse_distkey(self) -> exp.DistKeyProperty: 2562 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2563 2564 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2565 table = self._parse_table(schema=True) 2566 2567 options = [] 2568 while self._match_texts(("INCLUDING", "EXCLUDING")): 2569 this = self._prev.text.upper() 2570 2571 id_var = self._parse_id_var() 2572 if not id_var: 2573 return None 2574 2575 options.append( 2576 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2577 ) 2578 2579 return self.expression(exp.LikeProperty, this=table, expressions=options) 2580 2581 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2582 return self.expression( 2583 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2584 ) 2585 2586 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2587 self._match(TokenType.EQ) 2588 return self.expression( 2589 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2590 ) 2591 2592 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2593 self._match_text_seq("WITH", "CONNECTION") 2594 return self.expression( 2595 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2596 ) 2597 2598 def _parse_returns(self) -> exp.ReturnsProperty: 2599 value: t.Optional[exp.Expression] 2600 null = None 2601 is_table = self._match(TokenType.TABLE) 2602 2603 if is_table: 2604 if self._match(TokenType.LT): 2605 value = self.expression( 2606 exp.Schema, 2607 this="TABLE", 2608 expressions=self._parse_csv(self._parse_struct_types), 2609 ) 2610 if not self._match(TokenType.GT): 2611 self.raise_error("Expecting >") 2612 else: 2613 value = self._parse_schema(exp.var("TABLE")) 2614 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2615 null = True 2616 value = None 2617 else: 2618 value = self._parse_types() 2619 2620 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2621 2622 def _parse_describe(self) -> exp.Describe: 2623 kind = self._match_set(self.CREATABLES) and self._prev.text 2624 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2625 if self._match(TokenType.DOT): 2626 style = None 2627 self._retreat(self._index - 2) 2628 2629 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2630 2631 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2632 this = self._parse_statement() 2633 else: 2634 this = self._parse_table(schema=True) 2635 2636 properties = self._parse_properties() 2637 expressions = properties.expressions if properties else None 2638 partition = self._parse_partition() 2639 return self.expression( 2640 exp.Describe, 2641 this=this, 2642 style=style, 2643 kind=kind, 2644 expressions=expressions, 2645 partition=partition, 2646 format=format, 2647 ) 2648 2649 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2650 kind = self._prev.text.upper() 2651 expressions = [] 2652 2653 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2654 if self._match(TokenType.WHEN): 2655 expression = self._parse_disjunction() 2656 self._match(TokenType.THEN) 2657 else: 2658 expression = None 2659 2660 else_ = self._match(TokenType.ELSE) 2661 2662 if not self._match(TokenType.INTO): 2663 return None 2664 2665 return self.expression( 2666 exp.ConditionalInsert, 2667 this=self.expression( 2668 exp.Insert, 2669 this=self._parse_table(schema=True), 2670 expression=self._parse_derived_table_values(), 2671 ), 2672 expression=expression, 2673 else_=else_, 2674 ) 2675 2676 expression = parse_conditional_insert() 2677 while expression is not None: 2678 expressions.append(expression) 2679 expression = parse_conditional_insert() 2680 2681 return self.expression( 2682 exp.MultitableInserts, 2683 kind=kind, 2684 comments=comments, 2685 expressions=expressions, 2686 source=self._parse_table(), 2687 ) 2688 2689 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2690 comments = [] 2691 hint = self._parse_hint() 2692 overwrite = self._match(TokenType.OVERWRITE) 2693 ignore = self._match(TokenType.IGNORE) 2694 local = self._match_text_seq("LOCAL") 2695 alternative = None 2696 is_function = None 2697 2698 if self._match_text_seq("DIRECTORY"): 2699 this: t.Optional[exp.Expression] = self.expression( 2700 exp.Directory, 2701 this=self._parse_var_or_string(), 2702 local=local, 2703 row_format=self._parse_row_format(match_row=True), 2704 ) 2705 else: 2706 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2707 comments += ensure_list(self._prev_comments) 2708 return self._parse_multitable_inserts(comments) 2709 2710 if self._match(TokenType.OR): 2711 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2712 2713 self._match(TokenType.INTO) 2714 comments += ensure_list(self._prev_comments) 2715 self._match(TokenType.TABLE) 2716 is_function = self._match(TokenType.FUNCTION) 2717 2718 this = ( 2719 self._parse_table(schema=True, parse_partition=True) 2720 if not is_function 2721 else self._parse_function() 2722 ) 2723 2724 returning = self._parse_returning() 2725 2726 return self.expression( 2727 exp.Insert, 2728 comments=comments, 2729 hint=hint, 2730 is_function=is_function, 2731 this=this, 2732 stored=self._match_text_seq("STORED") and self._parse_stored(), 2733 by_name=self._match_text_seq("BY", "NAME"), 2734 exists=self._parse_exists(), 2735 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2736 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2737 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2738 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2739 conflict=self._parse_on_conflict(), 2740 returning=returning or self._parse_returning(), 2741 overwrite=overwrite, 2742 alternative=alternative, 2743 ignore=ignore, 2744 source=self._match(TokenType.TABLE) and self._parse_table(), 2745 ) 2746 2747 def _parse_kill(self) -> exp.Kill: 2748 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2749 2750 return self.expression( 2751 exp.Kill, 2752 this=self._parse_primary(), 2753 kind=kind, 2754 ) 2755 2756 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2757 conflict = self._match_text_seq("ON", "CONFLICT") 2758 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2759 2760 if not conflict and not duplicate: 2761 return None 2762 2763 conflict_keys = None 2764 constraint = None 2765 2766 if conflict: 2767 if self._match_text_seq("ON", "CONSTRAINT"): 2768 constraint = self._parse_id_var() 2769 elif self._match(TokenType.L_PAREN): 2770 conflict_keys = self._parse_csv(self._parse_id_var) 2771 self._match_r_paren() 2772 2773 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2774 if self._prev.token_type == TokenType.UPDATE: 2775 self._match(TokenType.SET) 2776 expressions = self._parse_csv(self._parse_equality) 2777 else: 2778 expressions = None 2779 2780 return self.expression( 2781 exp.OnConflict, 2782 duplicate=duplicate, 2783 expressions=expressions, 2784 action=action, 2785 conflict_keys=conflict_keys, 2786 constraint=constraint, 2787 ) 2788 2789 def _parse_returning(self) -> t.Optional[exp.Returning]: 2790 if not self._match(TokenType.RETURNING): 2791 return None 2792 return self.expression( 2793 exp.Returning, 2794 expressions=self._parse_csv(self._parse_expression), 2795 into=self._match(TokenType.INTO) and self._parse_table_part(), 2796 ) 2797 2798 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2799 if not self._match(TokenType.FORMAT): 2800 return None 2801 return self._parse_row_format() 2802 2803 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2804 index = self._index 2805 with_ = with_ or self._match_text_seq("WITH") 2806 2807 if not self._match(TokenType.SERDE_PROPERTIES): 2808 self._retreat(index) 2809 return None 2810 return self.expression( 2811 exp.SerdeProperties, 2812 **{ # type: ignore 2813 "expressions": self._parse_wrapped_properties(), 2814 "with": with_, 2815 }, 2816 ) 2817 2818 def _parse_row_format( 2819 self, match_row: bool = False 2820 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2821 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2822 return None 2823 2824 if self._match_text_seq("SERDE"): 2825 this = self._parse_string() 2826 2827 serde_properties = self._parse_serde_properties() 2828 2829 return self.expression( 2830 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2831 ) 2832 2833 self._match_text_seq("DELIMITED") 2834 2835 kwargs = {} 2836 2837 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2838 kwargs["fields"] = self._parse_string() 2839 if self._match_text_seq("ESCAPED", "BY"): 2840 kwargs["escaped"] = self._parse_string() 2841 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2842 kwargs["collection_items"] = self._parse_string() 2843 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2844 kwargs["map_keys"] = self._parse_string() 2845 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2846 kwargs["lines"] = self._parse_string() 2847 if self._match_text_seq("NULL", "DEFINED", "AS"): 2848 kwargs["null"] = self._parse_string() 2849 2850 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2851 2852 def _parse_load(self) -> exp.LoadData | exp.Command: 2853 if self._match_text_seq("DATA"): 2854 local = self._match_text_seq("LOCAL") 2855 self._match_text_seq("INPATH") 2856 inpath = self._parse_string() 2857 overwrite = self._match(TokenType.OVERWRITE) 2858 self._match_pair(TokenType.INTO, TokenType.TABLE) 2859 2860 return self.expression( 2861 exp.LoadData, 2862 this=self._parse_table(schema=True), 2863 local=local, 2864 overwrite=overwrite, 2865 inpath=inpath, 2866 partition=self._parse_partition(), 2867 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2868 serde=self._match_text_seq("SERDE") and self._parse_string(), 2869 ) 2870 return self._parse_as_command(self._prev) 2871 2872 def _parse_delete(self) -> exp.Delete: 2873 # This handles MySQL's "Multiple-Table Syntax" 2874 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2875 tables = None 2876 if not self._match(TokenType.FROM, advance=False): 2877 tables = self._parse_csv(self._parse_table) or None 2878 2879 returning = self._parse_returning() 2880 2881 return self.expression( 2882 exp.Delete, 2883 tables=tables, 2884 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2885 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2886 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2887 where=self._parse_where(), 2888 returning=returning or self._parse_returning(), 2889 limit=self._parse_limit(), 2890 ) 2891 2892 def _parse_update(self) -> exp.Update: 2893 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2894 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2895 returning = self._parse_returning() 2896 return self.expression( 2897 exp.Update, 2898 **{ # type: ignore 2899 "this": this, 2900 "expressions": expressions, 2901 "from": self._parse_from(joins=True), 2902 "where": self._parse_where(), 2903 "returning": returning or self._parse_returning(), 2904 "order": self._parse_order(), 2905 "limit": self._parse_limit(), 2906 }, 2907 ) 2908 2909 def _parse_uncache(self) -> exp.Uncache: 2910 if not self._match(TokenType.TABLE): 2911 self.raise_error("Expecting TABLE after UNCACHE") 2912 2913 return self.expression( 2914 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2915 ) 2916 2917 def _parse_cache(self) -> exp.Cache: 2918 lazy = self._match_text_seq("LAZY") 2919 self._match(TokenType.TABLE) 2920 table = self._parse_table(schema=True) 2921 2922 options = [] 2923 if self._match_text_seq("OPTIONS"): 2924 self._match_l_paren() 2925 k = self._parse_string() 2926 self._match(TokenType.EQ) 2927 v = self._parse_string() 2928 options = [k, v] 2929 self._match_r_paren() 2930 2931 self._match(TokenType.ALIAS) 2932 return self.expression( 2933 exp.Cache, 2934 this=table, 2935 lazy=lazy, 2936 options=options, 2937 expression=self._parse_select(nested=True), 2938 ) 2939 2940 def _parse_partition(self) -> t.Optional[exp.Partition]: 2941 if not self._match(TokenType.PARTITION): 2942 return None 2943 2944 return self.expression( 2945 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2946 ) 2947 2948 def _parse_value(self) -> t.Optional[exp.Tuple]: 2949 def _parse_value_expression() -> t.Optional[exp.Expression]: 2950 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 2951 return exp.var(self._prev.text.upper()) 2952 return self._parse_expression() 2953 2954 if self._match(TokenType.L_PAREN): 2955 expressions = self._parse_csv(_parse_value_expression) 2956 self._match_r_paren() 2957 return self.expression(exp.Tuple, expressions=expressions) 2958 2959 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2960 expression = self._parse_expression() 2961 if expression: 2962 return self.expression(exp.Tuple, expressions=[expression]) 2963 return None 2964 2965 def _parse_projections(self) -> t.List[exp.Expression]: 2966 return self._parse_expressions() 2967 2968 def _parse_select( 2969 self, 2970 nested: bool = False, 2971 table: bool = False, 2972 parse_subquery_alias: bool = True, 2973 parse_set_operation: bool = True, 2974 ) -> t.Optional[exp.Expression]: 2975 cte = self._parse_with() 2976 2977 if cte: 2978 this = self._parse_statement() 2979 2980 if not this: 2981 self.raise_error("Failed to parse any statement following CTE") 2982 return cte 2983 2984 if "with" in this.arg_types: 2985 this.set("with", cte) 2986 else: 2987 self.raise_error(f"{this.key} does not support CTE") 2988 this = cte 2989 2990 return this 2991 2992 # duckdb supports leading with FROM x 2993 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2994 2995 if self._match(TokenType.SELECT): 2996 comments = self._prev_comments 2997 2998 hint = self._parse_hint() 2999 3000 if self._next and not self._next.token_type == TokenType.DOT: 3001 all_ = self._match(TokenType.ALL) 3002 distinct = self._match_set(self.DISTINCT_TOKENS) 3003 else: 3004 all_, distinct = None, None 3005 3006 kind = ( 3007 self._match(TokenType.ALIAS) 3008 and self._match_texts(("STRUCT", "VALUE")) 3009 and self._prev.text.upper() 3010 ) 3011 3012 if distinct: 3013 distinct = self.expression( 3014 exp.Distinct, 3015 on=self._parse_value() if self._match(TokenType.ON) else None, 3016 ) 3017 3018 if all_ and distinct: 3019 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3020 3021 operation_modifiers = [] 3022 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3023 operation_modifiers.append(exp.var(self._prev.text.upper())) 3024 3025 limit = self._parse_limit(top=True) 3026 projections = self._parse_projections() 3027 3028 this = self.expression( 3029 exp.Select, 3030 kind=kind, 3031 hint=hint, 3032 distinct=distinct, 3033 expressions=projections, 3034 limit=limit, 3035 operation_modifiers=operation_modifiers or None, 3036 ) 3037 this.comments = comments 3038 3039 into = self._parse_into() 3040 if into: 3041 this.set("into", into) 3042 3043 if not from_: 3044 from_ = self._parse_from() 3045 3046 if from_: 3047 this.set("from", from_) 3048 3049 this = self._parse_query_modifiers(this) 3050 elif (table or nested) and self._match(TokenType.L_PAREN): 3051 if self._match(TokenType.PIVOT): 3052 this = self._parse_simplified_pivot() 3053 elif self._match(TokenType.FROM): 3054 this = exp.select("*").from_( 3055 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3056 ) 3057 else: 3058 this = ( 3059 self._parse_table() 3060 if table 3061 else self._parse_select(nested=True, parse_set_operation=False) 3062 ) 3063 3064 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3065 # in case a modifier (e.g. join) is following 3066 if table and isinstance(this, exp.Values) and this.alias: 3067 alias = this.args["alias"].pop() 3068 this = exp.Table(this=this, alias=alias) 3069 3070 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3071 3072 self._match_r_paren() 3073 3074 # We return early here so that the UNION isn't attached to the subquery by the 3075 # following call to _parse_set_operations, but instead becomes the parent node 3076 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3077 elif self._match(TokenType.VALUES, advance=False): 3078 this = self._parse_derived_table_values() 3079 elif from_: 3080 this = exp.select("*").from_(from_.this, copy=False) 3081 elif self._match(TokenType.SUMMARIZE): 3082 table = self._match(TokenType.TABLE) 3083 this = self._parse_select() or self._parse_string() or self._parse_table() 3084 return self.expression(exp.Summarize, this=this, table=table) 3085 elif self._match(TokenType.DESCRIBE): 3086 this = self._parse_describe() 3087 elif self._match_text_seq("STREAM"): 3088 this = self._parse_function() 3089 if this: 3090 this = self.expression(exp.Stream, this=this) 3091 else: 3092 self._retreat(self._index - 1) 3093 else: 3094 this = None 3095 3096 return self._parse_set_operations(this) if parse_set_operation else this 3097 3098 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3099 if not skip_with_token and not self._match(TokenType.WITH): 3100 return None 3101 3102 comments = self._prev_comments 3103 recursive = self._match(TokenType.RECURSIVE) 3104 3105 last_comments = None 3106 expressions = [] 3107 while True: 3108 expressions.append(self._parse_cte()) 3109 if last_comments: 3110 expressions[-1].add_comments(last_comments) 3111 3112 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3113 break 3114 else: 3115 self._match(TokenType.WITH) 3116 3117 last_comments = self._prev_comments 3118 3119 return self.expression( 3120 exp.With, comments=comments, expressions=expressions, recursive=recursive 3121 ) 3122 3123 def _parse_cte(self) -> exp.CTE: 3124 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3125 if not alias or not alias.this: 3126 self.raise_error("Expected CTE to have alias") 3127 3128 self._match(TokenType.ALIAS) 3129 comments = self._prev_comments 3130 3131 if self._match_text_seq("NOT", "MATERIALIZED"): 3132 materialized = False 3133 elif self._match_text_seq("MATERIALIZED"): 3134 materialized = True 3135 else: 3136 materialized = None 3137 3138 return self.expression( 3139 exp.CTE, 3140 this=self._parse_wrapped(self._parse_statement), 3141 alias=alias, 3142 materialized=materialized, 3143 comments=comments, 3144 ) 3145 3146 def _parse_table_alias( 3147 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3148 ) -> t.Optional[exp.TableAlias]: 3149 any_token = self._match(TokenType.ALIAS) 3150 alias = ( 3151 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3152 or self._parse_string_as_identifier() 3153 ) 3154 3155 index = self._index 3156 if self._match(TokenType.L_PAREN): 3157 columns = self._parse_csv(self._parse_function_parameter) 3158 self._match_r_paren() if columns else self._retreat(index) 3159 else: 3160 columns = None 3161 3162 if not alias and not columns: 3163 return None 3164 3165 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3166 3167 # We bubble up comments from the Identifier to the TableAlias 3168 if isinstance(alias, exp.Identifier): 3169 table_alias.add_comments(alias.pop_comments()) 3170 3171 return table_alias 3172 3173 def _parse_subquery( 3174 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3175 ) -> t.Optional[exp.Subquery]: 3176 if not this: 3177 return None 3178 3179 return self.expression( 3180 exp.Subquery, 3181 this=this, 3182 pivots=self._parse_pivots(), 3183 alias=self._parse_table_alias() if parse_alias else None, 3184 sample=self._parse_table_sample(), 3185 ) 3186 3187 def _implicit_unnests_to_explicit(self, this: E) -> E: 3188 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3189 3190 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3191 for i, join in enumerate(this.args.get("joins") or []): 3192 table = join.this 3193 normalized_table = table.copy() 3194 normalized_table.meta["maybe_column"] = True 3195 normalized_table = _norm(normalized_table, dialect=self.dialect) 3196 3197 if isinstance(table, exp.Table) and not join.args.get("on"): 3198 if normalized_table.parts[0].name in refs: 3199 table_as_column = table.to_column() 3200 unnest = exp.Unnest(expressions=[table_as_column]) 3201 3202 # Table.to_column creates a parent Alias node that we want to convert to 3203 # a TableAlias and attach to the Unnest, so it matches the parser's output 3204 if isinstance(table.args.get("alias"), exp.TableAlias): 3205 table_as_column.replace(table_as_column.this) 3206 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3207 3208 table.replace(unnest) 3209 3210 refs.add(normalized_table.alias_or_name) 3211 3212 return this 3213 3214 def _parse_query_modifiers( 3215 self, this: t.Optional[exp.Expression] 3216 ) -> t.Optional[exp.Expression]: 3217 if isinstance(this, (exp.Query, exp.Table)): 3218 for join in self._parse_joins(): 3219 this.append("joins", join) 3220 for lateral in iter(self._parse_lateral, None): 3221 this.append("laterals", lateral) 3222 3223 while True: 3224 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3225 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3226 key, expression = parser(self) 3227 3228 if expression: 3229 this.set(key, expression) 3230 if key == "limit": 3231 offset = expression.args.pop("offset", None) 3232 3233 if offset: 3234 offset = exp.Offset(expression=offset) 3235 this.set("offset", offset) 3236 3237 limit_by_expressions = expression.expressions 3238 expression.set("expressions", None) 3239 offset.set("expressions", limit_by_expressions) 3240 continue 3241 break 3242 3243 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3244 this = self._implicit_unnests_to_explicit(this) 3245 3246 return this 3247 3248 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3249 start = self._curr 3250 while self._curr: 3251 self._advance() 3252 3253 end = self._tokens[self._index - 1] 3254 return exp.Hint(expressions=[self._find_sql(start, end)]) 3255 3256 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3257 return self._parse_function_call() 3258 3259 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3260 start_index = self._index 3261 should_fallback_to_string = False 3262 3263 hints = [] 3264 try: 3265 for hint in iter( 3266 lambda: self._parse_csv( 3267 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3268 ), 3269 [], 3270 ): 3271 hints.extend(hint) 3272 except ParseError: 3273 should_fallback_to_string = True 3274 3275 if should_fallback_to_string or self._curr: 3276 self._retreat(start_index) 3277 return self._parse_hint_fallback_to_string() 3278 3279 return self.expression(exp.Hint, expressions=hints) 3280 3281 def _parse_hint(self) -> t.Optional[exp.Hint]: 3282 if self._match(TokenType.HINT) and self._prev_comments: 3283 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3284 3285 return None 3286 3287 def _parse_into(self) -> t.Optional[exp.Into]: 3288 if not self._match(TokenType.INTO): 3289 return None 3290 3291 temp = self._match(TokenType.TEMPORARY) 3292 unlogged = self._match_text_seq("UNLOGGED") 3293 self._match(TokenType.TABLE) 3294 3295 return self.expression( 3296 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3297 ) 3298 3299 def _parse_from( 3300 self, joins: bool = False, skip_from_token: bool = False 3301 ) -> t.Optional[exp.From]: 3302 if not skip_from_token and not self._match(TokenType.FROM): 3303 return None 3304 3305 return self.expression( 3306 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3307 ) 3308 3309 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3310 return self.expression( 3311 exp.MatchRecognizeMeasure, 3312 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3313 this=self._parse_expression(), 3314 ) 3315 3316 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3317 if not self._match(TokenType.MATCH_RECOGNIZE): 3318 return None 3319 3320 self._match_l_paren() 3321 3322 partition = self._parse_partition_by() 3323 order = self._parse_order() 3324 3325 measures = ( 3326 self._parse_csv(self._parse_match_recognize_measure) 3327 if self._match_text_seq("MEASURES") 3328 else None 3329 ) 3330 3331 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3332 rows = exp.var("ONE ROW PER MATCH") 3333 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3334 text = "ALL ROWS PER MATCH" 3335 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3336 text += " SHOW EMPTY MATCHES" 3337 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3338 text += " OMIT EMPTY MATCHES" 3339 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3340 text += " WITH UNMATCHED ROWS" 3341 rows = exp.var(text) 3342 else: 3343 rows = None 3344 3345 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3346 text = "AFTER MATCH SKIP" 3347 if self._match_text_seq("PAST", "LAST", "ROW"): 3348 text += " PAST LAST ROW" 3349 elif self._match_text_seq("TO", "NEXT", "ROW"): 3350 text += " TO NEXT ROW" 3351 elif self._match_text_seq("TO", "FIRST"): 3352 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3353 elif self._match_text_seq("TO", "LAST"): 3354 text += f" TO LAST {self._advance_any().text}" # type: ignore 3355 after = exp.var(text) 3356 else: 3357 after = None 3358 3359 if self._match_text_seq("PATTERN"): 3360 self._match_l_paren() 3361 3362 if not self._curr: 3363 self.raise_error("Expecting )", self._curr) 3364 3365 paren = 1 3366 start = self._curr 3367 3368 while self._curr and paren > 0: 3369 if self._curr.token_type == TokenType.L_PAREN: 3370 paren += 1 3371 if self._curr.token_type == TokenType.R_PAREN: 3372 paren -= 1 3373 3374 end = self._prev 3375 self._advance() 3376 3377 if paren > 0: 3378 self.raise_error("Expecting )", self._curr) 3379 3380 pattern = exp.var(self._find_sql(start, end)) 3381 else: 3382 pattern = None 3383 3384 define = ( 3385 self._parse_csv(self._parse_name_as_expression) 3386 if self._match_text_seq("DEFINE") 3387 else None 3388 ) 3389 3390 self._match_r_paren() 3391 3392 return self.expression( 3393 exp.MatchRecognize, 3394 partition_by=partition, 3395 order=order, 3396 measures=measures, 3397 rows=rows, 3398 after=after, 3399 pattern=pattern, 3400 define=define, 3401 alias=self._parse_table_alias(), 3402 ) 3403 3404 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3405 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3406 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3407 cross_apply = False 3408 3409 if cross_apply is not None: 3410 this = self._parse_select(table=True) 3411 view = None 3412 outer = None 3413 elif self._match(TokenType.LATERAL): 3414 this = self._parse_select(table=True) 3415 view = self._match(TokenType.VIEW) 3416 outer = self._match(TokenType.OUTER) 3417 else: 3418 return None 3419 3420 if not this: 3421 this = ( 3422 self._parse_unnest() 3423 or self._parse_function() 3424 or self._parse_id_var(any_token=False) 3425 ) 3426 3427 while self._match(TokenType.DOT): 3428 this = exp.Dot( 3429 this=this, 3430 expression=self._parse_function() or self._parse_id_var(any_token=False), 3431 ) 3432 3433 if view: 3434 table = self._parse_id_var(any_token=False) 3435 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3436 table_alias: t.Optional[exp.TableAlias] = self.expression( 3437 exp.TableAlias, this=table, columns=columns 3438 ) 3439 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3440 # We move the alias from the lateral's child node to the lateral itself 3441 table_alias = this.args["alias"].pop() 3442 else: 3443 table_alias = self._parse_table_alias() 3444 3445 return self.expression( 3446 exp.Lateral, 3447 this=this, 3448 view=view, 3449 outer=outer, 3450 alias=table_alias, 3451 cross_apply=cross_apply, 3452 ) 3453 3454 def _parse_join_parts( 3455 self, 3456 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3457 return ( 3458 self._match_set(self.JOIN_METHODS) and self._prev, 3459 self._match_set(self.JOIN_SIDES) and self._prev, 3460 self._match_set(self.JOIN_KINDS) and self._prev, 3461 ) 3462 3463 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3464 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3465 this = self._parse_column() 3466 if isinstance(this, exp.Column): 3467 return this.this 3468 return this 3469 3470 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3471 3472 def _parse_join( 3473 self, skip_join_token: bool = False, parse_bracket: bool = False 3474 ) -> t.Optional[exp.Join]: 3475 if self._match(TokenType.COMMA): 3476 return self.expression(exp.Join, this=self._parse_table()) 3477 3478 index = self._index 3479 method, side, kind = self._parse_join_parts() 3480 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3481 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3482 3483 if not skip_join_token and not join: 3484 self._retreat(index) 3485 kind = None 3486 method = None 3487 side = None 3488 3489 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3490 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3491 3492 if not skip_join_token and not join and not outer_apply and not cross_apply: 3493 return None 3494 3495 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3496 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3497 kwargs["expressions"] = self._parse_csv( 3498 lambda: self._parse_table(parse_bracket=parse_bracket) 3499 ) 3500 3501 if method: 3502 kwargs["method"] = method.text 3503 if side: 3504 kwargs["side"] = side.text 3505 if kind: 3506 kwargs["kind"] = kind.text 3507 if hint: 3508 kwargs["hint"] = hint 3509 3510 if self._match(TokenType.MATCH_CONDITION): 3511 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3512 3513 if self._match(TokenType.ON): 3514 kwargs["on"] = self._parse_assignment() 3515 elif self._match(TokenType.USING): 3516 kwargs["using"] = self._parse_using_identifiers() 3517 elif ( 3518 not (outer_apply or cross_apply) 3519 and not isinstance(kwargs["this"], exp.Unnest) 3520 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3521 ): 3522 index = self._index 3523 joins: t.Optional[list] = list(self._parse_joins()) 3524 3525 if joins and self._match(TokenType.ON): 3526 kwargs["on"] = self._parse_assignment() 3527 elif joins and self._match(TokenType.USING): 3528 kwargs["using"] = self._parse_using_identifiers() 3529 else: 3530 joins = None 3531 self._retreat(index) 3532 3533 kwargs["this"].set("joins", joins if joins else None) 3534 3535 comments = [c for token in (method, side, kind) if token for c in token.comments] 3536 return self.expression(exp.Join, comments=comments, **kwargs) 3537 3538 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3539 this = self._parse_assignment() 3540 3541 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3542 return this 3543 3544 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3545 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3546 3547 return this 3548 3549 def _parse_index_params(self) -> exp.IndexParameters: 3550 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3551 3552 if self._match(TokenType.L_PAREN, advance=False): 3553 columns = self._parse_wrapped_csv(self._parse_with_operator) 3554 else: 3555 columns = None 3556 3557 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3558 partition_by = self._parse_partition_by() 3559 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3560 tablespace = ( 3561 self._parse_var(any_token=True) 3562 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3563 else None 3564 ) 3565 where = self._parse_where() 3566 3567 on = self._parse_field() if self._match(TokenType.ON) else None 3568 3569 return self.expression( 3570 exp.IndexParameters, 3571 using=using, 3572 columns=columns, 3573 include=include, 3574 partition_by=partition_by, 3575 where=where, 3576 with_storage=with_storage, 3577 tablespace=tablespace, 3578 on=on, 3579 ) 3580 3581 def _parse_index( 3582 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3583 ) -> t.Optional[exp.Index]: 3584 if index or anonymous: 3585 unique = None 3586 primary = None 3587 amp = None 3588 3589 self._match(TokenType.ON) 3590 self._match(TokenType.TABLE) # hive 3591 table = self._parse_table_parts(schema=True) 3592 else: 3593 unique = self._match(TokenType.UNIQUE) 3594 primary = self._match_text_seq("PRIMARY") 3595 amp = self._match_text_seq("AMP") 3596 3597 if not self._match(TokenType.INDEX): 3598 return None 3599 3600 index = self._parse_id_var() 3601 table = None 3602 3603 params = self._parse_index_params() 3604 3605 return self.expression( 3606 exp.Index, 3607 this=index, 3608 table=table, 3609 unique=unique, 3610 primary=primary, 3611 amp=amp, 3612 params=params, 3613 ) 3614 3615 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3616 hints: t.List[exp.Expression] = [] 3617 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3618 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3619 hints.append( 3620 self.expression( 3621 exp.WithTableHint, 3622 expressions=self._parse_csv( 3623 lambda: self._parse_function() or self._parse_var(any_token=True) 3624 ), 3625 ) 3626 ) 3627 self._match_r_paren() 3628 else: 3629 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3630 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3631 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3632 3633 self._match_set((TokenType.INDEX, TokenType.KEY)) 3634 if self._match(TokenType.FOR): 3635 hint.set("target", self._advance_any() and self._prev.text.upper()) 3636 3637 hint.set("expressions", self._parse_wrapped_id_vars()) 3638 hints.append(hint) 3639 3640 return hints or None 3641 3642 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3643 return ( 3644 (not schema and self._parse_function(optional_parens=False)) 3645 or self._parse_id_var(any_token=False) 3646 or self._parse_string_as_identifier() 3647 or self._parse_placeholder() 3648 ) 3649 3650 def _parse_table_parts( 3651 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3652 ) -> exp.Table: 3653 catalog = None 3654 db = None 3655 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3656 3657 while self._match(TokenType.DOT): 3658 if catalog: 3659 # This allows nesting the table in arbitrarily many dot expressions if needed 3660 table = self.expression( 3661 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3662 ) 3663 else: 3664 catalog = db 3665 db = table 3666 # "" used for tsql FROM a..b case 3667 table = self._parse_table_part(schema=schema) or "" 3668 3669 if ( 3670 wildcard 3671 and self._is_connected() 3672 and (isinstance(table, exp.Identifier) or not table) 3673 and self._match(TokenType.STAR) 3674 ): 3675 if isinstance(table, exp.Identifier): 3676 table.args["this"] += "*" 3677 else: 3678 table = exp.Identifier(this="*") 3679 3680 # We bubble up comments from the Identifier to the Table 3681 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3682 3683 if is_db_reference: 3684 catalog = db 3685 db = table 3686 table = None 3687 3688 if not table and not is_db_reference: 3689 self.raise_error(f"Expected table name but got {self._curr}") 3690 if not db and is_db_reference: 3691 self.raise_error(f"Expected database name but got {self._curr}") 3692 3693 table = self.expression( 3694 exp.Table, 3695 comments=comments, 3696 this=table, 3697 db=db, 3698 catalog=catalog, 3699 ) 3700 3701 changes = self._parse_changes() 3702 if changes: 3703 table.set("changes", changes) 3704 3705 at_before = self._parse_historical_data() 3706 if at_before: 3707 table.set("when", at_before) 3708 3709 pivots = self._parse_pivots() 3710 if pivots: 3711 table.set("pivots", pivots) 3712 3713 return table 3714 3715 def _parse_table( 3716 self, 3717 schema: bool = False, 3718 joins: bool = False, 3719 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3720 parse_bracket: bool = False, 3721 is_db_reference: bool = False, 3722 parse_partition: bool = False, 3723 ) -> t.Optional[exp.Expression]: 3724 lateral = self._parse_lateral() 3725 if lateral: 3726 return lateral 3727 3728 unnest = self._parse_unnest() 3729 if unnest: 3730 return unnest 3731 3732 values = self._parse_derived_table_values() 3733 if values: 3734 return values 3735 3736 subquery = self._parse_select(table=True) 3737 if subquery: 3738 if not subquery.args.get("pivots"): 3739 subquery.set("pivots", self._parse_pivots()) 3740 return subquery 3741 3742 bracket = parse_bracket and self._parse_bracket(None) 3743 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3744 3745 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3746 self._parse_table 3747 ) 3748 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3749 3750 only = self._match(TokenType.ONLY) 3751 3752 this = t.cast( 3753 exp.Expression, 3754 bracket 3755 or rows_from 3756 or self._parse_bracket( 3757 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3758 ), 3759 ) 3760 3761 if only: 3762 this.set("only", only) 3763 3764 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3765 self._match_text_seq("*") 3766 3767 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3768 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3769 this.set("partition", self._parse_partition()) 3770 3771 if schema: 3772 return self._parse_schema(this=this) 3773 3774 version = self._parse_version() 3775 3776 if version: 3777 this.set("version", version) 3778 3779 if self.dialect.ALIAS_POST_TABLESAMPLE: 3780 this.set("sample", self._parse_table_sample()) 3781 3782 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3783 if alias: 3784 this.set("alias", alias) 3785 3786 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3787 return self.expression( 3788 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3789 ) 3790 3791 this.set("hints", self._parse_table_hints()) 3792 3793 if not this.args.get("pivots"): 3794 this.set("pivots", self._parse_pivots()) 3795 3796 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3797 this.set("sample", self._parse_table_sample()) 3798 3799 if joins: 3800 for join in self._parse_joins(): 3801 this.append("joins", join) 3802 3803 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3804 this.set("ordinality", True) 3805 this.set("alias", self._parse_table_alias()) 3806 3807 return this 3808 3809 def _parse_version(self) -> t.Optional[exp.Version]: 3810 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3811 this = "TIMESTAMP" 3812 elif self._match(TokenType.VERSION_SNAPSHOT): 3813 this = "VERSION" 3814 else: 3815 return None 3816 3817 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3818 kind = self._prev.text.upper() 3819 start = self._parse_bitwise() 3820 self._match_texts(("TO", "AND")) 3821 end = self._parse_bitwise() 3822 expression: t.Optional[exp.Expression] = self.expression( 3823 exp.Tuple, expressions=[start, end] 3824 ) 3825 elif self._match_text_seq("CONTAINED", "IN"): 3826 kind = "CONTAINED IN" 3827 expression = self.expression( 3828 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3829 ) 3830 elif self._match(TokenType.ALL): 3831 kind = "ALL" 3832 expression = None 3833 else: 3834 self._match_text_seq("AS", "OF") 3835 kind = "AS OF" 3836 expression = self._parse_type() 3837 3838 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3839 3840 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3841 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3842 index = self._index 3843 historical_data = None 3844 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3845 this = self._prev.text.upper() 3846 kind = ( 3847 self._match(TokenType.L_PAREN) 3848 and self._match_texts(self.HISTORICAL_DATA_KIND) 3849 and self._prev.text.upper() 3850 ) 3851 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3852 3853 if expression: 3854 self._match_r_paren() 3855 historical_data = self.expression( 3856 exp.HistoricalData, this=this, kind=kind, expression=expression 3857 ) 3858 else: 3859 self._retreat(index) 3860 3861 return historical_data 3862 3863 def _parse_changes(self) -> t.Optional[exp.Changes]: 3864 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3865 return None 3866 3867 information = self._parse_var(any_token=True) 3868 self._match_r_paren() 3869 3870 return self.expression( 3871 exp.Changes, 3872 information=information, 3873 at_before=self._parse_historical_data(), 3874 end=self._parse_historical_data(), 3875 ) 3876 3877 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3878 if not self._match(TokenType.UNNEST): 3879 return None 3880 3881 expressions = self._parse_wrapped_csv(self._parse_equality) 3882 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3883 3884 alias = self._parse_table_alias() if with_alias else None 3885 3886 if alias: 3887 if self.dialect.UNNEST_COLUMN_ONLY: 3888 if alias.args.get("columns"): 3889 self.raise_error("Unexpected extra column alias in unnest.") 3890 3891 alias.set("columns", [alias.this]) 3892 alias.set("this", None) 3893 3894 columns = alias.args.get("columns") or [] 3895 if offset and len(expressions) < len(columns): 3896 offset = columns.pop() 3897 3898 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3899 self._match(TokenType.ALIAS) 3900 offset = self._parse_id_var( 3901 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3902 ) or exp.to_identifier("offset") 3903 3904 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3905 3906 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3907 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3908 if not is_derived and not ( 3909 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3910 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3911 ): 3912 return None 3913 3914 expressions = self._parse_csv(self._parse_value) 3915 alias = self._parse_table_alias() 3916 3917 if is_derived: 3918 self._match_r_paren() 3919 3920 return self.expression( 3921 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3922 ) 3923 3924 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3925 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3926 as_modifier and self._match_text_seq("USING", "SAMPLE") 3927 ): 3928 return None 3929 3930 bucket_numerator = None 3931 bucket_denominator = None 3932 bucket_field = None 3933 percent = None 3934 size = None 3935 seed = None 3936 3937 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3938 matched_l_paren = self._match(TokenType.L_PAREN) 3939 3940 if self.TABLESAMPLE_CSV: 3941 num = None 3942 expressions = self._parse_csv(self._parse_primary) 3943 else: 3944 expressions = None 3945 num = ( 3946 self._parse_factor() 3947 if self._match(TokenType.NUMBER, advance=False) 3948 else self._parse_primary() or self._parse_placeholder() 3949 ) 3950 3951 if self._match_text_seq("BUCKET"): 3952 bucket_numerator = self._parse_number() 3953 self._match_text_seq("OUT", "OF") 3954 bucket_denominator = bucket_denominator = self._parse_number() 3955 self._match(TokenType.ON) 3956 bucket_field = self._parse_field() 3957 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3958 percent = num 3959 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3960 size = num 3961 else: 3962 percent = num 3963 3964 if matched_l_paren: 3965 self._match_r_paren() 3966 3967 if self._match(TokenType.L_PAREN): 3968 method = self._parse_var(upper=True) 3969 seed = self._match(TokenType.COMMA) and self._parse_number() 3970 self._match_r_paren() 3971 elif self._match_texts(("SEED", "REPEATABLE")): 3972 seed = self._parse_wrapped(self._parse_number) 3973 3974 if not method and self.DEFAULT_SAMPLING_METHOD: 3975 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3976 3977 return self.expression( 3978 exp.TableSample, 3979 expressions=expressions, 3980 method=method, 3981 bucket_numerator=bucket_numerator, 3982 bucket_denominator=bucket_denominator, 3983 bucket_field=bucket_field, 3984 percent=percent, 3985 size=size, 3986 seed=seed, 3987 ) 3988 3989 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3990 return list(iter(self._parse_pivot, None)) or None 3991 3992 def _parse_joins(self) -> t.Iterator[exp.Join]: 3993 return iter(self._parse_join, None) 3994 3995 # https://duckdb.org/docs/sql/statements/pivot 3996 def _parse_simplified_pivot(self) -> exp.Pivot: 3997 def _parse_on() -> t.Optional[exp.Expression]: 3998 this = self._parse_bitwise() 3999 return self._parse_in(this) if self._match(TokenType.IN) else this 4000 4001 this = self._parse_table() 4002 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4003 using = self._match(TokenType.USING) and self._parse_csv( 4004 lambda: self._parse_alias(self._parse_function()) 4005 ) 4006 group = self._parse_group() 4007 return self.expression( 4008 exp.Pivot, this=this, expressions=expressions, using=using, group=group 4009 ) 4010 4011 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4012 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4013 this = self._parse_select_or_expression() 4014 4015 self._match(TokenType.ALIAS) 4016 alias = self._parse_bitwise() 4017 if alias: 4018 if isinstance(alias, exp.Column) and not alias.db: 4019 alias = alias.this 4020 return self.expression(exp.PivotAlias, this=this, alias=alias) 4021 4022 return this 4023 4024 value = self._parse_column() 4025 4026 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4027 self.raise_error("Expecting IN (") 4028 4029 if self._match(TokenType.ANY): 4030 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4031 else: 4032 exprs = self._parse_csv(_parse_aliased_expression) 4033 4034 self._match_r_paren() 4035 return self.expression(exp.In, this=value, expressions=exprs) 4036 4037 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4038 index = self._index 4039 include_nulls = None 4040 4041 if self._match(TokenType.PIVOT): 4042 unpivot = False 4043 elif self._match(TokenType.UNPIVOT): 4044 unpivot = True 4045 4046 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4047 if self._match_text_seq("INCLUDE", "NULLS"): 4048 include_nulls = True 4049 elif self._match_text_seq("EXCLUDE", "NULLS"): 4050 include_nulls = False 4051 else: 4052 return None 4053 4054 expressions = [] 4055 4056 if not self._match(TokenType.L_PAREN): 4057 self._retreat(index) 4058 return None 4059 4060 if unpivot: 4061 expressions = self._parse_csv(self._parse_column) 4062 else: 4063 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4064 4065 if not expressions: 4066 self.raise_error("Failed to parse PIVOT's aggregation list") 4067 4068 if not self._match(TokenType.FOR): 4069 self.raise_error("Expecting FOR") 4070 4071 field = self._parse_pivot_in() 4072 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4073 self._parse_bitwise 4074 ) 4075 4076 self._match_r_paren() 4077 4078 pivot = self.expression( 4079 exp.Pivot, 4080 expressions=expressions, 4081 field=field, 4082 unpivot=unpivot, 4083 include_nulls=include_nulls, 4084 default_on_null=default_on_null, 4085 ) 4086 4087 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4088 pivot.set("alias", self._parse_table_alias()) 4089 4090 if not unpivot: 4091 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4092 4093 columns: t.List[exp.Expression] = [] 4094 for fld in pivot.args["field"].expressions: 4095 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4096 for name in names: 4097 if self.PREFIXED_PIVOT_COLUMNS: 4098 name = f"{name}_{field_name}" if name else field_name 4099 else: 4100 name = f"{field_name}_{name}" if name else field_name 4101 4102 columns.append(exp.to_identifier(name)) 4103 4104 pivot.set("columns", columns) 4105 4106 return pivot 4107 4108 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4109 return [agg.alias for agg in aggregations] 4110 4111 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4112 if not skip_where_token and not self._match(TokenType.PREWHERE): 4113 return None 4114 4115 return self.expression( 4116 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4117 ) 4118 4119 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4120 if not skip_where_token and not self._match(TokenType.WHERE): 4121 return None 4122 4123 return self.expression( 4124 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4125 ) 4126 4127 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4128 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4129 return None 4130 4131 elements: t.Dict[str, t.Any] = defaultdict(list) 4132 4133 if self._match(TokenType.ALL): 4134 elements["all"] = True 4135 elif self._match(TokenType.DISTINCT): 4136 elements["all"] = False 4137 4138 while True: 4139 index = self._index 4140 4141 elements["expressions"].extend( 4142 self._parse_csv( 4143 lambda: None 4144 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4145 else self._parse_assignment() 4146 ) 4147 ) 4148 4149 before_with_index = self._index 4150 with_prefix = self._match(TokenType.WITH) 4151 4152 if self._match(TokenType.ROLLUP): 4153 elements["rollup"].append( 4154 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4155 ) 4156 elif self._match(TokenType.CUBE): 4157 elements["cube"].append( 4158 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4159 ) 4160 elif self._match(TokenType.GROUPING_SETS): 4161 elements["grouping_sets"].append( 4162 self.expression( 4163 exp.GroupingSets, 4164 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4165 ) 4166 ) 4167 elif self._match_text_seq("TOTALS"): 4168 elements["totals"] = True # type: ignore 4169 4170 if before_with_index <= self._index <= before_with_index + 1: 4171 self._retreat(before_with_index) 4172 break 4173 4174 if index == self._index: 4175 break 4176 4177 return self.expression(exp.Group, **elements) # type: ignore 4178 4179 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4180 return self.expression( 4181 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4182 ) 4183 4184 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4185 if self._match(TokenType.L_PAREN): 4186 grouping_set = self._parse_csv(self._parse_column) 4187 self._match_r_paren() 4188 return self.expression(exp.Tuple, expressions=grouping_set) 4189 4190 return self._parse_column() 4191 4192 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4193 if not skip_having_token and not self._match(TokenType.HAVING): 4194 return None 4195 return self.expression(exp.Having, this=self._parse_assignment()) 4196 4197 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4198 if not self._match(TokenType.QUALIFY): 4199 return None 4200 return self.expression(exp.Qualify, this=self._parse_assignment()) 4201 4202 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4203 if skip_start_token: 4204 start = None 4205 elif self._match(TokenType.START_WITH): 4206 start = self._parse_assignment() 4207 else: 4208 return None 4209 4210 self._match(TokenType.CONNECT_BY) 4211 nocycle = self._match_text_seq("NOCYCLE") 4212 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4213 exp.Prior, this=self._parse_bitwise() 4214 ) 4215 connect = self._parse_assignment() 4216 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4217 4218 if not start and self._match(TokenType.START_WITH): 4219 start = self._parse_assignment() 4220 4221 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4222 4223 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4224 this = self._parse_id_var(any_token=True) 4225 if self._match(TokenType.ALIAS): 4226 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4227 return this 4228 4229 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4230 if self._match_text_seq("INTERPOLATE"): 4231 return self._parse_wrapped_csv(self._parse_name_as_expression) 4232 return None 4233 4234 def _parse_order( 4235 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4236 ) -> t.Optional[exp.Expression]: 4237 siblings = None 4238 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4239 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4240 return this 4241 4242 siblings = True 4243 4244 return self.expression( 4245 exp.Order, 4246 this=this, 4247 expressions=self._parse_csv(self._parse_ordered), 4248 siblings=siblings, 4249 ) 4250 4251 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4252 if not self._match(token): 4253 return None 4254 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4255 4256 def _parse_ordered( 4257 self, parse_method: t.Optional[t.Callable] = None 4258 ) -> t.Optional[exp.Ordered]: 4259 this = parse_method() if parse_method else self._parse_assignment() 4260 if not this: 4261 return None 4262 4263 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4264 this = exp.var("ALL") 4265 4266 asc = self._match(TokenType.ASC) 4267 desc = self._match(TokenType.DESC) or (asc and False) 4268 4269 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4270 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4271 4272 nulls_first = is_nulls_first or False 4273 explicitly_null_ordered = is_nulls_first or is_nulls_last 4274 4275 if ( 4276 not explicitly_null_ordered 4277 and ( 4278 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4279 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4280 ) 4281 and self.dialect.NULL_ORDERING != "nulls_are_last" 4282 ): 4283 nulls_first = True 4284 4285 if self._match_text_seq("WITH", "FILL"): 4286 with_fill = self.expression( 4287 exp.WithFill, 4288 **{ # type: ignore 4289 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4290 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4291 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4292 "interpolate": self._parse_interpolate(), 4293 }, 4294 ) 4295 else: 4296 with_fill = None 4297 4298 return self.expression( 4299 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4300 ) 4301 4302 def _parse_limit( 4303 self, 4304 this: t.Optional[exp.Expression] = None, 4305 top: bool = False, 4306 skip_limit_token: bool = False, 4307 ) -> t.Optional[exp.Expression]: 4308 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4309 comments = self._prev_comments 4310 if top: 4311 limit_paren = self._match(TokenType.L_PAREN) 4312 expression = self._parse_term() if limit_paren else self._parse_number() 4313 4314 if limit_paren: 4315 self._match_r_paren() 4316 else: 4317 expression = self._parse_term() 4318 4319 if self._match(TokenType.COMMA): 4320 offset = expression 4321 expression = self._parse_term() 4322 else: 4323 offset = None 4324 4325 limit_exp = self.expression( 4326 exp.Limit, 4327 this=this, 4328 expression=expression, 4329 offset=offset, 4330 comments=comments, 4331 expressions=self._parse_limit_by(), 4332 ) 4333 4334 return limit_exp 4335 4336 if self._match(TokenType.FETCH): 4337 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4338 direction = self._prev.text.upper() if direction else "FIRST" 4339 4340 count = self._parse_field(tokens=self.FETCH_TOKENS) 4341 percent = self._match(TokenType.PERCENT) 4342 4343 self._match_set((TokenType.ROW, TokenType.ROWS)) 4344 4345 only = self._match_text_seq("ONLY") 4346 with_ties = self._match_text_seq("WITH", "TIES") 4347 4348 if only and with_ties: 4349 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4350 4351 return self.expression( 4352 exp.Fetch, 4353 direction=direction, 4354 count=count, 4355 percent=percent, 4356 with_ties=with_ties, 4357 ) 4358 4359 return this 4360 4361 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4362 if not self._match(TokenType.OFFSET): 4363 return this 4364 4365 count = self._parse_term() 4366 self._match_set((TokenType.ROW, TokenType.ROWS)) 4367 4368 return self.expression( 4369 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4370 ) 4371 4372 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4373 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4374 4375 def _parse_locks(self) -> t.List[exp.Lock]: 4376 locks = [] 4377 while True: 4378 if self._match_text_seq("FOR", "UPDATE"): 4379 update = True 4380 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4381 "LOCK", "IN", "SHARE", "MODE" 4382 ): 4383 update = False 4384 else: 4385 break 4386 4387 expressions = None 4388 if self._match_text_seq("OF"): 4389 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4390 4391 wait: t.Optional[bool | exp.Expression] = None 4392 if self._match_text_seq("NOWAIT"): 4393 wait = True 4394 elif self._match_text_seq("WAIT"): 4395 wait = self._parse_primary() 4396 elif self._match_text_seq("SKIP", "LOCKED"): 4397 wait = False 4398 4399 locks.append( 4400 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4401 ) 4402 4403 return locks 4404 4405 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4406 while this and self._match_set(self.SET_OPERATIONS): 4407 token_type = self._prev.token_type 4408 4409 if token_type == TokenType.UNION: 4410 operation: t.Type[exp.SetOperation] = exp.Union 4411 elif token_type == TokenType.EXCEPT: 4412 operation = exp.Except 4413 else: 4414 operation = exp.Intersect 4415 4416 comments = self._prev.comments 4417 4418 if self._match(TokenType.DISTINCT): 4419 distinct: t.Optional[bool] = True 4420 elif self._match(TokenType.ALL): 4421 distinct = False 4422 else: 4423 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4424 if distinct is None: 4425 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4426 4427 by_name = self._match_text_seq("BY", "NAME") 4428 expression = self._parse_select(nested=True, parse_set_operation=False) 4429 4430 this = self.expression( 4431 operation, 4432 comments=comments, 4433 this=this, 4434 distinct=distinct, 4435 by_name=by_name, 4436 expression=expression, 4437 ) 4438 4439 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4440 expression = this.expression 4441 4442 if expression: 4443 for arg in self.SET_OP_MODIFIERS: 4444 expr = expression.args.get(arg) 4445 if expr: 4446 this.set(arg, expr.pop()) 4447 4448 return this 4449 4450 def _parse_expression(self) -> t.Optional[exp.Expression]: 4451 return self._parse_alias(self._parse_assignment()) 4452 4453 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4454 this = self._parse_disjunction() 4455 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4456 # This allows us to parse <non-identifier token> := <expr> 4457 this = exp.column( 4458 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4459 ) 4460 4461 while self._match_set(self.ASSIGNMENT): 4462 if isinstance(this, exp.Column) and len(this.parts) == 1: 4463 this = this.this 4464 4465 this = self.expression( 4466 self.ASSIGNMENT[self._prev.token_type], 4467 this=this, 4468 comments=self._prev_comments, 4469 expression=self._parse_assignment(), 4470 ) 4471 4472 return this 4473 4474 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4475 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4476 4477 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4478 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4479 4480 def _parse_equality(self) -> t.Optional[exp.Expression]: 4481 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4482 4483 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4484 return self._parse_tokens(self._parse_range, self.COMPARISON) 4485 4486 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4487 this = this or self._parse_bitwise() 4488 negate = self._match(TokenType.NOT) 4489 4490 if self._match_set(self.RANGE_PARSERS): 4491 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4492 if not expression: 4493 return this 4494 4495 this = expression 4496 elif self._match(TokenType.ISNULL): 4497 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4498 4499 # Postgres supports ISNULL and NOTNULL for conditions. 4500 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4501 if self._match(TokenType.NOTNULL): 4502 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4503 this = self.expression(exp.Not, this=this) 4504 4505 if negate: 4506 this = self._negate_range(this) 4507 4508 if self._match(TokenType.IS): 4509 this = self._parse_is(this) 4510 4511 return this 4512 4513 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4514 if not this: 4515 return this 4516 4517 return self.expression(exp.Not, this=this) 4518 4519 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4520 index = self._index - 1 4521 negate = self._match(TokenType.NOT) 4522 4523 if self._match_text_seq("DISTINCT", "FROM"): 4524 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4525 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4526 4527 if self._match(TokenType.JSON): 4528 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4529 4530 if self._match_text_seq("WITH"): 4531 _with = True 4532 elif self._match_text_seq("WITHOUT"): 4533 _with = False 4534 else: 4535 _with = None 4536 4537 unique = self._match(TokenType.UNIQUE) 4538 self._match_text_seq("KEYS") 4539 expression: t.Optional[exp.Expression] = self.expression( 4540 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4541 ) 4542 else: 4543 expression = self._parse_primary() or self._parse_null() 4544 if not expression: 4545 self._retreat(index) 4546 return None 4547 4548 this = self.expression(exp.Is, this=this, expression=expression) 4549 return self.expression(exp.Not, this=this) if negate else this 4550 4551 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4552 unnest = self._parse_unnest(with_alias=False) 4553 if unnest: 4554 this = self.expression(exp.In, this=this, unnest=unnest) 4555 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4556 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4557 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4558 4559 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4560 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4561 else: 4562 this = self.expression(exp.In, this=this, expressions=expressions) 4563 4564 if matched_l_paren: 4565 self._match_r_paren(this) 4566 elif not self._match(TokenType.R_BRACKET, expression=this): 4567 self.raise_error("Expecting ]") 4568 else: 4569 this = self.expression(exp.In, this=this, field=self._parse_column()) 4570 4571 return this 4572 4573 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4574 low = self._parse_bitwise() 4575 self._match(TokenType.AND) 4576 high = self._parse_bitwise() 4577 return self.expression(exp.Between, this=this, low=low, high=high) 4578 4579 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4580 if not self._match(TokenType.ESCAPE): 4581 return this 4582 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4583 4584 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4585 index = self._index 4586 4587 if not self._match(TokenType.INTERVAL) and match_interval: 4588 return None 4589 4590 if self._match(TokenType.STRING, advance=False): 4591 this = self._parse_primary() 4592 else: 4593 this = self._parse_term() 4594 4595 if not this or ( 4596 isinstance(this, exp.Column) 4597 and not this.table 4598 and not this.this.quoted 4599 and this.name.upper() == "IS" 4600 ): 4601 self._retreat(index) 4602 return None 4603 4604 unit = self._parse_function() or ( 4605 not self._match(TokenType.ALIAS, advance=False) 4606 and self._parse_var(any_token=True, upper=True) 4607 ) 4608 4609 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4610 # each INTERVAL expression into this canonical form so it's easy to transpile 4611 if this and this.is_number: 4612 this = exp.Literal.string(this.to_py()) 4613 elif this and this.is_string: 4614 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4615 if len(parts) == 1: 4616 if unit: 4617 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4618 self._retreat(self._index - 1) 4619 4620 this = exp.Literal.string(parts[0][0]) 4621 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4622 4623 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4624 unit = self.expression( 4625 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4626 ) 4627 4628 interval = self.expression(exp.Interval, this=this, unit=unit) 4629 4630 index = self._index 4631 self._match(TokenType.PLUS) 4632 4633 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4634 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4635 return self.expression( 4636 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4637 ) 4638 4639 self._retreat(index) 4640 return interval 4641 4642 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4643 this = self._parse_term() 4644 4645 while True: 4646 if self._match_set(self.BITWISE): 4647 this = self.expression( 4648 self.BITWISE[self._prev.token_type], 4649 this=this, 4650 expression=self._parse_term(), 4651 ) 4652 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4653 this = self.expression( 4654 exp.DPipe, 4655 this=this, 4656 expression=self._parse_term(), 4657 safe=not self.dialect.STRICT_STRING_CONCAT, 4658 ) 4659 elif self._match(TokenType.DQMARK): 4660 this = self.expression( 4661 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4662 ) 4663 elif self._match_pair(TokenType.LT, TokenType.LT): 4664 this = self.expression( 4665 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4666 ) 4667 elif self._match_pair(TokenType.GT, TokenType.GT): 4668 this = self.expression( 4669 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4670 ) 4671 else: 4672 break 4673 4674 return this 4675 4676 def _parse_term(self) -> t.Optional[exp.Expression]: 4677 this = self._parse_factor() 4678 4679 while self._match_set(self.TERM): 4680 klass = self.TERM[self._prev.token_type] 4681 comments = self._prev_comments 4682 expression = self._parse_factor() 4683 4684 this = self.expression(klass, this=this, comments=comments, expression=expression) 4685 4686 if isinstance(this, exp.Collate): 4687 expr = this.expression 4688 4689 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4690 # fallback to Identifier / Var 4691 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4692 ident = expr.this 4693 if isinstance(ident, exp.Identifier): 4694 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4695 4696 return this 4697 4698 def _parse_factor(self) -> t.Optional[exp.Expression]: 4699 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4700 this = parse_method() 4701 4702 while self._match_set(self.FACTOR): 4703 klass = self.FACTOR[self._prev.token_type] 4704 comments = self._prev_comments 4705 expression = parse_method() 4706 4707 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4708 self._retreat(self._index - 1) 4709 return this 4710 4711 this = self.expression(klass, this=this, comments=comments, expression=expression) 4712 4713 if isinstance(this, exp.Div): 4714 this.args["typed"] = self.dialect.TYPED_DIVISION 4715 this.args["safe"] = self.dialect.SAFE_DIVISION 4716 4717 return this 4718 4719 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4720 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4721 4722 def _parse_unary(self) -> t.Optional[exp.Expression]: 4723 if self._match_set(self.UNARY_PARSERS): 4724 return self.UNARY_PARSERS[self._prev.token_type](self) 4725 return self._parse_at_time_zone(self._parse_type()) 4726 4727 def _parse_type( 4728 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4729 ) -> t.Optional[exp.Expression]: 4730 interval = parse_interval and self._parse_interval() 4731 if interval: 4732 return interval 4733 4734 index = self._index 4735 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4736 4737 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4738 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4739 if isinstance(data_type, exp.Cast): 4740 # This constructor can contain ops directly after it, for instance struct unnesting: 4741 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4742 return self._parse_column_ops(data_type) 4743 4744 if data_type: 4745 index2 = self._index 4746 this = self._parse_primary() 4747 4748 if isinstance(this, exp.Literal): 4749 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4750 if parser: 4751 return parser(self, this, data_type) 4752 4753 return self.expression(exp.Cast, this=this, to=data_type) 4754 4755 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4756 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4757 # 4758 # If the index difference here is greater than 1, that means the parser itself must have 4759 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4760 # 4761 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4762 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4763 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4764 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4765 # 4766 # In these cases, we don't really want to return the converted type, but instead retreat 4767 # and try to parse a Column or Identifier in the section below. 4768 if data_type.expressions and index2 - index > 1: 4769 self._retreat(index2) 4770 return self._parse_column_ops(data_type) 4771 4772 self._retreat(index) 4773 4774 if fallback_to_identifier: 4775 return self._parse_id_var() 4776 4777 this = self._parse_column() 4778 return this and self._parse_column_ops(this) 4779 4780 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4781 this = self._parse_type() 4782 if not this: 4783 return None 4784 4785 if isinstance(this, exp.Column) and not this.table: 4786 this = exp.var(this.name.upper()) 4787 4788 return self.expression( 4789 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4790 ) 4791 4792 def _parse_types( 4793 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4794 ) -> t.Optional[exp.Expression]: 4795 index = self._index 4796 4797 this: t.Optional[exp.Expression] = None 4798 prefix = self._match_text_seq("SYSUDTLIB", ".") 4799 4800 if not self._match_set(self.TYPE_TOKENS): 4801 identifier = allow_identifiers and self._parse_id_var( 4802 any_token=False, tokens=(TokenType.VAR,) 4803 ) 4804 if isinstance(identifier, exp.Identifier): 4805 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4806 4807 if len(tokens) != 1: 4808 self.raise_error("Unexpected identifier", self._prev) 4809 4810 if tokens[0].token_type in self.TYPE_TOKENS: 4811 self._prev = tokens[0] 4812 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4813 type_name = identifier.name 4814 4815 while self._match(TokenType.DOT): 4816 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4817 4818 this = exp.DataType.build(type_name, udt=True) 4819 else: 4820 self._retreat(self._index - 1) 4821 return None 4822 else: 4823 return None 4824 4825 type_token = self._prev.token_type 4826 4827 if type_token == TokenType.PSEUDO_TYPE: 4828 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4829 4830 if type_token == TokenType.OBJECT_IDENTIFIER: 4831 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4832 4833 # https://materialize.com/docs/sql/types/map/ 4834 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4835 key_type = self._parse_types( 4836 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4837 ) 4838 if not self._match(TokenType.FARROW): 4839 self._retreat(index) 4840 return None 4841 4842 value_type = self._parse_types( 4843 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4844 ) 4845 if not self._match(TokenType.R_BRACKET): 4846 self._retreat(index) 4847 return None 4848 4849 return exp.DataType( 4850 this=exp.DataType.Type.MAP, 4851 expressions=[key_type, value_type], 4852 nested=True, 4853 prefix=prefix, 4854 ) 4855 4856 nested = type_token in self.NESTED_TYPE_TOKENS 4857 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4858 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4859 expressions = None 4860 maybe_func = False 4861 4862 if self._match(TokenType.L_PAREN): 4863 if is_struct: 4864 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4865 elif nested: 4866 expressions = self._parse_csv( 4867 lambda: self._parse_types( 4868 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4869 ) 4870 ) 4871 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4872 this = expressions[0] 4873 this.set("nullable", True) 4874 self._match_r_paren() 4875 return this 4876 elif type_token in self.ENUM_TYPE_TOKENS: 4877 expressions = self._parse_csv(self._parse_equality) 4878 elif is_aggregate: 4879 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4880 any_token=False, tokens=(TokenType.VAR,) 4881 ) 4882 if not func_or_ident or not self._match(TokenType.COMMA): 4883 return None 4884 expressions = self._parse_csv( 4885 lambda: self._parse_types( 4886 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4887 ) 4888 ) 4889 expressions.insert(0, func_or_ident) 4890 else: 4891 expressions = self._parse_csv(self._parse_type_size) 4892 4893 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4894 if type_token == TokenType.VECTOR and len(expressions) == 2: 4895 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4896 4897 if not expressions or not self._match(TokenType.R_PAREN): 4898 self._retreat(index) 4899 return None 4900 4901 maybe_func = True 4902 4903 values: t.Optional[t.List[exp.Expression]] = None 4904 4905 if nested and self._match(TokenType.LT): 4906 if is_struct: 4907 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4908 else: 4909 expressions = self._parse_csv( 4910 lambda: self._parse_types( 4911 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4912 ) 4913 ) 4914 4915 if not self._match(TokenType.GT): 4916 self.raise_error("Expecting >") 4917 4918 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4919 values = self._parse_csv(self._parse_assignment) 4920 if not values and is_struct: 4921 values = None 4922 self._retreat(self._index - 1) 4923 else: 4924 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4925 4926 if type_token in self.TIMESTAMPS: 4927 if self._match_text_seq("WITH", "TIME", "ZONE"): 4928 maybe_func = False 4929 tz_type = ( 4930 exp.DataType.Type.TIMETZ 4931 if type_token in self.TIMES 4932 else exp.DataType.Type.TIMESTAMPTZ 4933 ) 4934 this = exp.DataType(this=tz_type, expressions=expressions) 4935 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4936 maybe_func = False 4937 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4938 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4939 maybe_func = False 4940 elif type_token == TokenType.INTERVAL: 4941 unit = self._parse_var(upper=True) 4942 if unit: 4943 if self._match_text_seq("TO"): 4944 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4945 4946 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4947 else: 4948 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4949 4950 if maybe_func and check_func: 4951 index2 = self._index 4952 peek = self._parse_string() 4953 4954 if not peek: 4955 self._retreat(index) 4956 return None 4957 4958 self._retreat(index2) 4959 4960 if not this: 4961 if self._match_text_seq("UNSIGNED"): 4962 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4963 if not unsigned_type_token: 4964 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4965 4966 type_token = unsigned_type_token or type_token 4967 4968 this = exp.DataType( 4969 this=exp.DataType.Type[type_token.value], 4970 expressions=expressions, 4971 nested=nested, 4972 prefix=prefix, 4973 ) 4974 4975 # Empty arrays/structs are allowed 4976 if values is not None: 4977 cls = exp.Struct if is_struct else exp.Array 4978 this = exp.cast(cls(expressions=values), this, copy=False) 4979 4980 elif expressions: 4981 this.set("expressions", expressions) 4982 4983 # https://materialize.com/docs/sql/types/list/#type-name 4984 while self._match(TokenType.LIST): 4985 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4986 4987 index = self._index 4988 4989 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4990 matched_array = self._match(TokenType.ARRAY) 4991 4992 while self._curr: 4993 datatype_token = self._prev.token_type 4994 matched_l_bracket = self._match(TokenType.L_BRACKET) 4995 if not matched_l_bracket and not matched_array: 4996 break 4997 4998 matched_array = False 4999 values = self._parse_csv(self._parse_assignment) or None 5000 if ( 5001 values 5002 and not schema 5003 and ( 5004 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5005 ) 5006 ): 5007 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5008 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5009 self._retreat(index) 5010 break 5011 5012 this = exp.DataType( 5013 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5014 ) 5015 self._match(TokenType.R_BRACKET) 5016 5017 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5018 converter = self.TYPE_CONVERTERS.get(this.this) 5019 if converter: 5020 this = converter(t.cast(exp.DataType, this)) 5021 5022 return this 5023 5024 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5025 index = self._index 5026 5027 if ( 5028 self._curr 5029 and self._next 5030 and self._curr.token_type in self.TYPE_TOKENS 5031 and self._next.token_type in self.TYPE_TOKENS 5032 ): 5033 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5034 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5035 this = self._parse_id_var() 5036 else: 5037 this = ( 5038 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5039 or self._parse_id_var() 5040 ) 5041 5042 self._match(TokenType.COLON) 5043 5044 if ( 5045 type_required 5046 and not isinstance(this, exp.DataType) 5047 and not self._match_set(self.TYPE_TOKENS, advance=False) 5048 ): 5049 self._retreat(index) 5050 return self._parse_types() 5051 5052 return self._parse_column_def(this) 5053 5054 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5055 if not self._match_text_seq("AT", "TIME", "ZONE"): 5056 return this 5057 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5058 5059 def _parse_column(self) -> t.Optional[exp.Expression]: 5060 this = self._parse_column_reference() 5061 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5062 5063 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5064 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5065 5066 return column 5067 5068 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5069 this = self._parse_field() 5070 if ( 5071 not this 5072 and self._match(TokenType.VALUES, advance=False) 5073 and self.VALUES_FOLLOWED_BY_PAREN 5074 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5075 ): 5076 this = self._parse_id_var() 5077 5078 if isinstance(this, exp.Identifier): 5079 # We bubble up comments from the Identifier to the Column 5080 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5081 5082 return this 5083 5084 def _parse_colon_as_variant_extract( 5085 self, this: t.Optional[exp.Expression] 5086 ) -> t.Optional[exp.Expression]: 5087 casts = [] 5088 json_path = [] 5089 escape = None 5090 5091 while self._match(TokenType.COLON): 5092 start_index = self._index 5093 5094 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5095 path = self._parse_column_ops( 5096 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5097 ) 5098 5099 # The cast :: operator has a lower precedence than the extraction operator :, so 5100 # we rearrange the AST appropriately to avoid casting the JSON path 5101 while isinstance(path, exp.Cast): 5102 casts.append(path.to) 5103 path = path.this 5104 5105 if casts: 5106 dcolon_offset = next( 5107 i 5108 for i, t in enumerate(self._tokens[start_index:]) 5109 if t.token_type == TokenType.DCOLON 5110 ) 5111 end_token = self._tokens[start_index + dcolon_offset - 1] 5112 else: 5113 end_token = self._prev 5114 5115 if path: 5116 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5117 # it'll roundtrip to a string literal in GET_PATH 5118 if isinstance(path, exp.Identifier) and path.quoted: 5119 escape = True 5120 5121 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5122 5123 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5124 # Databricks transforms it back to the colon/dot notation 5125 if json_path: 5126 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5127 5128 if json_path_expr: 5129 json_path_expr.set("escape", escape) 5130 5131 this = self.expression( 5132 exp.JSONExtract, 5133 this=this, 5134 expression=json_path_expr, 5135 variant_extract=True, 5136 ) 5137 5138 while casts: 5139 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5140 5141 return this 5142 5143 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5144 return self._parse_types() 5145 5146 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5147 this = self._parse_bracket(this) 5148 5149 while self._match_set(self.COLUMN_OPERATORS): 5150 op_token = self._prev.token_type 5151 op = self.COLUMN_OPERATORS.get(op_token) 5152 5153 if op_token == TokenType.DCOLON: 5154 field = self._parse_dcolon() 5155 if not field: 5156 self.raise_error("Expected type") 5157 elif op and self._curr: 5158 field = self._parse_column_reference() or self._parse_bracket() 5159 else: 5160 field = self._parse_field(any_token=True, anonymous_func=True) 5161 5162 if isinstance(field, (exp.Func, exp.Window)) and this: 5163 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5164 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5165 this = exp.replace_tree( 5166 this, 5167 lambda n: ( 5168 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5169 if n.table 5170 else n.this 5171 ) 5172 if isinstance(n, exp.Column) 5173 else n, 5174 ) 5175 5176 if op: 5177 this = op(self, this, field) 5178 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5179 this = self.expression( 5180 exp.Column, 5181 comments=this.comments, 5182 this=field, 5183 table=this.this, 5184 db=this.args.get("table"), 5185 catalog=this.args.get("db"), 5186 ) 5187 elif isinstance(field, exp.Window): 5188 # Move the exp.Dot's to the window's function 5189 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5190 field.set("this", window_func) 5191 this = field 5192 else: 5193 this = self.expression(exp.Dot, this=this, expression=field) 5194 5195 if field and field.comments: 5196 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5197 5198 this = self._parse_bracket(this) 5199 5200 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5201 5202 def _parse_primary(self) -> t.Optional[exp.Expression]: 5203 if self._match_set(self.PRIMARY_PARSERS): 5204 token_type = self._prev.token_type 5205 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5206 5207 if token_type == TokenType.STRING: 5208 expressions = [primary] 5209 while self._match(TokenType.STRING): 5210 expressions.append(exp.Literal.string(self._prev.text)) 5211 5212 if len(expressions) > 1: 5213 return self.expression(exp.Concat, expressions=expressions) 5214 5215 return primary 5216 5217 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5218 return exp.Literal.number(f"0.{self._prev.text}") 5219 5220 if self._match(TokenType.L_PAREN): 5221 comments = self._prev_comments 5222 query = self._parse_select() 5223 5224 if query: 5225 expressions = [query] 5226 else: 5227 expressions = self._parse_expressions() 5228 5229 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5230 5231 if not this and self._match(TokenType.R_PAREN, advance=False): 5232 this = self.expression(exp.Tuple) 5233 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5234 this = self._parse_subquery(this=this, parse_alias=False) 5235 elif isinstance(this, exp.Subquery): 5236 this = self._parse_subquery( 5237 this=self._parse_set_operations(this), parse_alias=False 5238 ) 5239 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5240 this = self.expression(exp.Tuple, expressions=expressions) 5241 else: 5242 this = self.expression(exp.Paren, this=this) 5243 5244 if this: 5245 this.add_comments(comments) 5246 5247 self._match_r_paren(expression=this) 5248 return this 5249 5250 return None 5251 5252 def _parse_field( 5253 self, 5254 any_token: bool = False, 5255 tokens: t.Optional[t.Collection[TokenType]] = None, 5256 anonymous_func: bool = False, 5257 ) -> t.Optional[exp.Expression]: 5258 if anonymous_func: 5259 field = ( 5260 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5261 or self._parse_primary() 5262 ) 5263 else: 5264 field = self._parse_primary() or self._parse_function( 5265 anonymous=anonymous_func, any_token=any_token 5266 ) 5267 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5268 5269 def _parse_function( 5270 self, 5271 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5272 anonymous: bool = False, 5273 optional_parens: bool = True, 5274 any_token: bool = False, 5275 ) -> t.Optional[exp.Expression]: 5276 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5277 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5278 fn_syntax = False 5279 if ( 5280 self._match(TokenType.L_BRACE, advance=False) 5281 and self._next 5282 and self._next.text.upper() == "FN" 5283 ): 5284 self._advance(2) 5285 fn_syntax = True 5286 5287 func = self._parse_function_call( 5288 functions=functions, 5289 anonymous=anonymous, 5290 optional_parens=optional_parens, 5291 any_token=any_token, 5292 ) 5293 5294 if fn_syntax: 5295 self._match(TokenType.R_BRACE) 5296 5297 return func 5298 5299 def _parse_function_call( 5300 self, 5301 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5302 anonymous: bool = False, 5303 optional_parens: bool = True, 5304 any_token: bool = False, 5305 ) -> t.Optional[exp.Expression]: 5306 if not self._curr: 5307 return None 5308 5309 comments = self._curr.comments 5310 token_type = self._curr.token_type 5311 this = self._curr.text 5312 upper = this.upper() 5313 5314 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5315 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5316 self._advance() 5317 return self._parse_window(parser(self)) 5318 5319 if not self._next or self._next.token_type != TokenType.L_PAREN: 5320 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5321 self._advance() 5322 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5323 5324 return None 5325 5326 if any_token: 5327 if token_type in self.RESERVED_TOKENS: 5328 return None 5329 elif token_type not in self.FUNC_TOKENS: 5330 return None 5331 5332 self._advance(2) 5333 5334 parser = self.FUNCTION_PARSERS.get(upper) 5335 if parser and not anonymous: 5336 this = parser(self) 5337 else: 5338 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5339 5340 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5341 this = self.expression( 5342 subquery_predicate, comments=comments, this=self._parse_select() 5343 ) 5344 self._match_r_paren() 5345 return this 5346 5347 if functions is None: 5348 functions = self.FUNCTIONS 5349 5350 function = functions.get(upper) 5351 5352 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5353 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5354 5355 if alias: 5356 args = self._kv_to_prop_eq(args) 5357 5358 if function and not anonymous: 5359 if "dialect" in function.__code__.co_varnames: 5360 func = function(args, dialect=self.dialect) 5361 else: 5362 func = function(args) 5363 5364 func = self.validate_expression(func, args) 5365 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5366 func.meta["name"] = this 5367 5368 this = func 5369 else: 5370 if token_type == TokenType.IDENTIFIER: 5371 this = exp.Identifier(this=this, quoted=True) 5372 this = self.expression(exp.Anonymous, this=this, expressions=args) 5373 5374 if isinstance(this, exp.Expression): 5375 this.add_comments(comments) 5376 5377 self._match_r_paren(this) 5378 return self._parse_window(this) 5379 5380 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5381 return expression 5382 5383 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5384 transformed = [] 5385 5386 for index, e in enumerate(expressions): 5387 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5388 if isinstance(e, exp.Alias): 5389 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5390 5391 if not isinstance(e, exp.PropertyEQ): 5392 e = self.expression( 5393 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5394 ) 5395 5396 if isinstance(e.this, exp.Column): 5397 e.this.replace(e.this.this) 5398 else: 5399 e = self._to_prop_eq(e, index) 5400 5401 transformed.append(e) 5402 5403 return transformed 5404 5405 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5406 return self._parse_statement() 5407 5408 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5409 return self._parse_column_def(self._parse_id_var()) 5410 5411 def _parse_user_defined_function( 5412 self, kind: t.Optional[TokenType] = None 5413 ) -> t.Optional[exp.Expression]: 5414 this = self._parse_id_var() 5415 5416 while self._match(TokenType.DOT): 5417 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5418 5419 if not self._match(TokenType.L_PAREN): 5420 return this 5421 5422 expressions = self._parse_csv(self._parse_function_parameter) 5423 self._match_r_paren() 5424 return self.expression( 5425 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5426 ) 5427 5428 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5429 literal = self._parse_primary() 5430 if literal: 5431 return self.expression(exp.Introducer, this=token.text, expression=literal) 5432 5433 return self.expression(exp.Identifier, this=token.text) 5434 5435 def _parse_session_parameter(self) -> exp.SessionParameter: 5436 kind = None 5437 this = self._parse_id_var() or self._parse_primary() 5438 5439 if this and self._match(TokenType.DOT): 5440 kind = this.name 5441 this = self._parse_var() or self._parse_primary() 5442 5443 return self.expression(exp.SessionParameter, this=this, kind=kind) 5444 5445 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5446 return self._parse_id_var() 5447 5448 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5449 index = self._index 5450 5451 if self._match(TokenType.L_PAREN): 5452 expressions = t.cast( 5453 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5454 ) 5455 5456 if not self._match(TokenType.R_PAREN): 5457 self._retreat(index) 5458 else: 5459 expressions = [self._parse_lambda_arg()] 5460 5461 if self._match_set(self.LAMBDAS): 5462 return self.LAMBDAS[self._prev.token_type](self, expressions) 5463 5464 self._retreat(index) 5465 5466 this: t.Optional[exp.Expression] 5467 5468 if self._match(TokenType.DISTINCT): 5469 this = self.expression( 5470 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5471 ) 5472 else: 5473 this = self._parse_select_or_expression(alias=alias) 5474 5475 return self._parse_limit( 5476 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5477 ) 5478 5479 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5480 index = self._index 5481 if not self._match(TokenType.L_PAREN): 5482 return this 5483 5484 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5485 # expr can be of both types 5486 if self._match_set(self.SELECT_START_TOKENS): 5487 self._retreat(index) 5488 return this 5489 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5490 self._match_r_paren() 5491 return self.expression(exp.Schema, this=this, expressions=args) 5492 5493 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5494 return self._parse_column_def(self._parse_field(any_token=True)) 5495 5496 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5497 # column defs are not really columns, they're identifiers 5498 if isinstance(this, exp.Column): 5499 this = this.this 5500 5501 kind = self._parse_types(schema=True) 5502 5503 if self._match_text_seq("FOR", "ORDINALITY"): 5504 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5505 5506 constraints: t.List[exp.Expression] = [] 5507 5508 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5509 ("ALIAS", "MATERIALIZED") 5510 ): 5511 persisted = self._prev.text.upper() == "MATERIALIZED" 5512 constraint_kind = exp.ComputedColumnConstraint( 5513 this=self._parse_assignment(), 5514 persisted=persisted or self._match_text_seq("PERSISTED"), 5515 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5516 ) 5517 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5518 elif ( 5519 kind 5520 and self._match(TokenType.ALIAS, advance=False) 5521 and ( 5522 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5523 or (self._next and self._next.token_type == TokenType.L_PAREN) 5524 ) 5525 ): 5526 self._advance() 5527 constraints.append( 5528 self.expression( 5529 exp.ColumnConstraint, 5530 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5531 ) 5532 ) 5533 5534 while True: 5535 constraint = self._parse_column_constraint() 5536 if not constraint: 5537 break 5538 constraints.append(constraint) 5539 5540 if not kind and not constraints: 5541 return this 5542 5543 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5544 5545 def _parse_auto_increment( 5546 self, 5547 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5548 start = None 5549 increment = None 5550 5551 if self._match(TokenType.L_PAREN, advance=False): 5552 args = self._parse_wrapped_csv(self._parse_bitwise) 5553 start = seq_get(args, 0) 5554 increment = seq_get(args, 1) 5555 elif self._match_text_seq("START"): 5556 start = self._parse_bitwise() 5557 self._match_text_seq("INCREMENT") 5558 increment = self._parse_bitwise() 5559 5560 if start and increment: 5561 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5562 5563 return exp.AutoIncrementColumnConstraint() 5564 5565 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5566 if not self._match_text_seq("REFRESH"): 5567 self._retreat(self._index - 1) 5568 return None 5569 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5570 5571 def _parse_compress(self) -> exp.CompressColumnConstraint: 5572 if self._match(TokenType.L_PAREN, advance=False): 5573 return self.expression( 5574 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5575 ) 5576 5577 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5578 5579 def _parse_generated_as_identity( 5580 self, 5581 ) -> ( 5582 exp.GeneratedAsIdentityColumnConstraint 5583 | exp.ComputedColumnConstraint 5584 | exp.GeneratedAsRowColumnConstraint 5585 ): 5586 if self._match_text_seq("BY", "DEFAULT"): 5587 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5588 this = self.expression( 5589 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5590 ) 5591 else: 5592 self._match_text_seq("ALWAYS") 5593 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5594 5595 self._match(TokenType.ALIAS) 5596 5597 if self._match_text_seq("ROW"): 5598 start = self._match_text_seq("START") 5599 if not start: 5600 self._match(TokenType.END) 5601 hidden = self._match_text_seq("HIDDEN") 5602 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5603 5604 identity = self._match_text_seq("IDENTITY") 5605 5606 if self._match(TokenType.L_PAREN): 5607 if self._match(TokenType.START_WITH): 5608 this.set("start", self._parse_bitwise()) 5609 if self._match_text_seq("INCREMENT", "BY"): 5610 this.set("increment", self._parse_bitwise()) 5611 if self._match_text_seq("MINVALUE"): 5612 this.set("minvalue", self._parse_bitwise()) 5613 if self._match_text_seq("MAXVALUE"): 5614 this.set("maxvalue", self._parse_bitwise()) 5615 5616 if self._match_text_seq("CYCLE"): 5617 this.set("cycle", True) 5618 elif self._match_text_seq("NO", "CYCLE"): 5619 this.set("cycle", False) 5620 5621 if not identity: 5622 this.set("expression", self._parse_range()) 5623 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5624 args = self._parse_csv(self._parse_bitwise) 5625 this.set("start", seq_get(args, 0)) 5626 this.set("increment", seq_get(args, 1)) 5627 5628 self._match_r_paren() 5629 5630 return this 5631 5632 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5633 self._match_text_seq("LENGTH") 5634 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5635 5636 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5637 if self._match_text_seq("NULL"): 5638 return self.expression(exp.NotNullColumnConstraint) 5639 if self._match_text_seq("CASESPECIFIC"): 5640 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5641 if self._match_text_seq("FOR", "REPLICATION"): 5642 return self.expression(exp.NotForReplicationColumnConstraint) 5643 5644 # Unconsume the `NOT` token 5645 self._retreat(self._index - 1) 5646 return None 5647 5648 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5649 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5650 5651 procedure_option_follows = ( 5652 self._match(TokenType.WITH, advance=False) 5653 and self._next 5654 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5655 ) 5656 5657 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5658 return self.expression( 5659 exp.ColumnConstraint, 5660 this=this, 5661 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5662 ) 5663 5664 return this 5665 5666 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5667 if not self._match(TokenType.CONSTRAINT): 5668 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5669 5670 return self.expression( 5671 exp.Constraint, 5672 this=self._parse_id_var(), 5673 expressions=self._parse_unnamed_constraints(), 5674 ) 5675 5676 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5677 constraints = [] 5678 while True: 5679 constraint = self._parse_unnamed_constraint() or self._parse_function() 5680 if not constraint: 5681 break 5682 constraints.append(constraint) 5683 5684 return constraints 5685 5686 def _parse_unnamed_constraint( 5687 self, constraints: t.Optional[t.Collection[str]] = None 5688 ) -> t.Optional[exp.Expression]: 5689 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5690 constraints or self.CONSTRAINT_PARSERS 5691 ): 5692 return None 5693 5694 constraint = self._prev.text.upper() 5695 if constraint not in self.CONSTRAINT_PARSERS: 5696 self.raise_error(f"No parser found for schema constraint {constraint}.") 5697 5698 return self.CONSTRAINT_PARSERS[constraint](self) 5699 5700 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5701 return self._parse_id_var(any_token=False) 5702 5703 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5704 self._match_text_seq("KEY") 5705 return self.expression( 5706 exp.UniqueColumnConstraint, 5707 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5708 this=self._parse_schema(self._parse_unique_key()), 5709 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5710 on_conflict=self._parse_on_conflict(), 5711 ) 5712 5713 def _parse_key_constraint_options(self) -> t.List[str]: 5714 options = [] 5715 while True: 5716 if not self._curr: 5717 break 5718 5719 if self._match(TokenType.ON): 5720 action = None 5721 on = self._advance_any() and self._prev.text 5722 5723 if self._match_text_seq("NO", "ACTION"): 5724 action = "NO ACTION" 5725 elif self._match_text_seq("CASCADE"): 5726 action = "CASCADE" 5727 elif self._match_text_seq("RESTRICT"): 5728 action = "RESTRICT" 5729 elif self._match_pair(TokenType.SET, TokenType.NULL): 5730 action = "SET NULL" 5731 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5732 action = "SET DEFAULT" 5733 else: 5734 self.raise_error("Invalid key constraint") 5735 5736 options.append(f"ON {on} {action}") 5737 else: 5738 var = self._parse_var_from_options( 5739 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5740 ) 5741 if not var: 5742 break 5743 options.append(var.name) 5744 5745 return options 5746 5747 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5748 if match and not self._match(TokenType.REFERENCES): 5749 return None 5750 5751 expressions = None 5752 this = self._parse_table(schema=True) 5753 options = self._parse_key_constraint_options() 5754 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5755 5756 def _parse_foreign_key(self) -> exp.ForeignKey: 5757 expressions = self._parse_wrapped_id_vars() 5758 reference = self._parse_references() 5759 options = {} 5760 5761 while self._match(TokenType.ON): 5762 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5763 self.raise_error("Expected DELETE or UPDATE") 5764 5765 kind = self._prev.text.lower() 5766 5767 if self._match_text_seq("NO", "ACTION"): 5768 action = "NO ACTION" 5769 elif self._match(TokenType.SET): 5770 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5771 action = "SET " + self._prev.text.upper() 5772 else: 5773 self._advance() 5774 action = self._prev.text.upper() 5775 5776 options[kind] = action 5777 5778 return self.expression( 5779 exp.ForeignKey, 5780 expressions=expressions, 5781 reference=reference, 5782 **options, # type: ignore 5783 ) 5784 5785 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5786 return self._parse_field() 5787 5788 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5789 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5790 self._retreat(self._index - 1) 5791 return None 5792 5793 id_vars = self._parse_wrapped_id_vars() 5794 return self.expression( 5795 exp.PeriodForSystemTimeConstraint, 5796 this=seq_get(id_vars, 0), 5797 expression=seq_get(id_vars, 1), 5798 ) 5799 5800 def _parse_primary_key( 5801 self, wrapped_optional: bool = False, in_props: bool = False 5802 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5803 desc = ( 5804 self._match_set((TokenType.ASC, TokenType.DESC)) 5805 and self._prev.token_type == TokenType.DESC 5806 ) 5807 5808 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5809 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5810 5811 expressions = self._parse_wrapped_csv( 5812 self._parse_primary_key_part, optional=wrapped_optional 5813 ) 5814 options = self._parse_key_constraint_options() 5815 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5816 5817 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5818 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5819 5820 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5821 """ 5822 Parses a datetime column in ODBC format. We parse the column into the corresponding 5823 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5824 same as we did for `DATE('yyyy-mm-dd')`. 5825 5826 Reference: 5827 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5828 """ 5829 self._match(TokenType.VAR) 5830 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5831 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5832 if not self._match(TokenType.R_BRACE): 5833 self.raise_error("Expected }") 5834 return expression 5835 5836 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5837 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5838 return this 5839 5840 bracket_kind = self._prev.token_type 5841 if ( 5842 bracket_kind == TokenType.L_BRACE 5843 and self._curr 5844 and self._curr.token_type == TokenType.VAR 5845 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5846 ): 5847 return self._parse_odbc_datetime_literal() 5848 5849 expressions = self._parse_csv( 5850 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5851 ) 5852 5853 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5854 self.raise_error("Expected ]") 5855 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5856 self.raise_error("Expected }") 5857 5858 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5859 if bracket_kind == TokenType.L_BRACE: 5860 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5861 elif not this: 5862 this = build_array_constructor( 5863 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5864 ) 5865 else: 5866 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5867 if constructor_type: 5868 return build_array_constructor( 5869 constructor_type, 5870 args=expressions, 5871 bracket_kind=bracket_kind, 5872 dialect=self.dialect, 5873 ) 5874 5875 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5876 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5877 5878 self._add_comments(this) 5879 return self._parse_bracket(this) 5880 5881 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5882 if self._match(TokenType.COLON): 5883 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5884 return this 5885 5886 def _parse_case(self) -> t.Optional[exp.Expression]: 5887 ifs = [] 5888 default = None 5889 5890 comments = self._prev_comments 5891 expression = self._parse_assignment() 5892 5893 while self._match(TokenType.WHEN): 5894 this = self._parse_assignment() 5895 self._match(TokenType.THEN) 5896 then = self._parse_assignment() 5897 ifs.append(self.expression(exp.If, this=this, true=then)) 5898 5899 if self._match(TokenType.ELSE): 5900 default = self._parse_assignment() 5901 5902 if not self._match(TokenType.END): 5903 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5904 default = exp.column("interval") 5905 else: 5906 self.raise_error("Expected END after CASE", self._prev) 5907 5908 return self.expression( 5909 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5910 ) 5911 5912 def _parse_if(self) -> t.Optional[exp.Expression]: 5913 if self._match(TokenType.L_PAREN): 5914 args = self._parse_csv(self._parse_assignment) 5915 this = self.validate_expression(exp.If.from_arg_list(args), args) 5916 self._match_r_paren() 5917 else: 5918 index = self._index - 1 5919 5920 if self.NO_PAREN_IF_COMMANDS and index == 0: 5921 return self._parse_as_command(self._prev) 5922 5923 condition = self._parse_assignment() 5924 5925 if not condition: 5926 self._retreat(index) 5927 return None 5928 5929 self._match(TokenType.THEN) 5930 true = self._parse_assignment() 5931 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5932 self._match(TokenType.END) 5933 this = self.expression(exp.If, this=condition, true=true, false=false) 5934 5935 return this 5936 5937 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5938 if not self._match_text_seq("VALUE", "FOR"): 5939 self._retreat(self._index - 1) 5940 return None 5941 5942 return self.expression( 5943 exp.NextValueFor, 5944 this=self._parse_column(), 5945 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5946 ) 5947 5948 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5949 this = self._parse_function() or self._parse_var_or_string(upper=True) 5950 5951 if self._match(TokenType.FROM): 5952 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5953 5954 if not self._match(TokenType.COMMA): 5955 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5956 5957 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5958 5959 def _parse_gap_fill(self) -> exp.GapFill: 5960 self._match(TokenType.TABLE) 5961 this = self._parse_table() 5962 5963 self._match(TokenType.COMMA) 5964 args = [this, *self._parse_csv(self._parse_lambda)] 5965 5966 gap_fill = exp.GapFill.from_arg_list(args) 5967 return self.validate_expression(gap_fill, args) 5968 5969 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5970 this = self._parse_assignment() 5971 5972 if not self._match(TokenType.ALIAS): 5973 if self._match(TokenType.COMMA): 5974 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5975 5976 self.raise_error("Expected AS after CAST") 5977 5978 fmt = None 5979 to = self._parse_types() 5980 5981 if self._match(TokenType.FORMAT): 5982 fmt_string = self._parse_string() 5983 fmt = self._parse_at_time_zone(fmt_string) 5984 5985 if not to: 5986 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5987 if to.this in exp.DataType.TEMPORAL_TYPES: 5988 this = self.expression( 5989 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5990 this=this, 5991 format=exp.Literal.string( 5992 format_time( 5993 fmt_string.this if fmt_string else "", 5994 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5995 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5996 ) 5997 ), 5998 safe=safe, 5999 ) 6000 6001 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6002 this.set("zone", fmt.args["zone"]) 6003 return this 6004 elif not to: 6005 self.raise_error("Expected TYPE after CAST") 6006 elif isinstance(to, exp.Identifier): 6007 to = exp.DataType.build(to.name, udt=True) 6008 elif to.this == exp.DataType.Type.CHAR: 6009 if self._match(TokenType.CHARACTER_SET): 6010 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6011 6012 return self.expression( 6013 exp.Cast if strict else exp.TryCast, 6014 this=this, 6015 to=to, 6016 format=fmt, 6017 safe=safe, 6018 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6019 ) 6020 6021 def _parse_string_agg(self) -> exp.GroupConcat: 6022 if self._match(TokenType.DISTINCT): 6023 args: t.List[t.Optional[exp.Expression]] = [ 6024 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6025 ] 6026 if self._match(TokenType.COMMA): 6027 args.extend(self._parse_csv(self._parse_assignment)) 6028 else: 6029 args = self._parse_csv(self._parse_assignment) # type: ignore 6030 6031 if self._match_text_seq("ON", "OVERFLOW"): 6032 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6033 if self._match_text_seq("ERROR"): 6034 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6035 else: 6036 self._match_text_seq("TRUNCATE") 6037 on_overflow = self.expression( 6038 exp.OverflowTruncateBehavior, 6039 this=self._parse_string(), 6040 with_count=( 6041 self._match_text_seq("WITH", "COUNT") 6042 or not self._match_text_seq("WITHOUT", "COUNT") 6043 ), 6044 ) 6045 else: 6046 on_overflow = None 6047 6048 index = self._index 6049 if not self._match(TokenType.R_PAREN) and args: 6050 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6051 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6052 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6053 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6054 6055 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6056 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6057 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6058 if not self._match_text_seq("WITHIN", "GROUP"): 6059 self._retreat(index) 6060 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6061 6062 # The corresponding match_r_paren will be called in parse_function (caller) 6063 self._match_l_paren() 6064 6065 return self.expression( 6066 exp.GroupConcat, 6067 this=self._parse_order(this=seq_get(args, 0)), 6068 separator=seq_get(args, 1), 6069 on_overflow=on_overflow, 6070 ) 6071 6072 def _parse_convert( 6073 self, strict: bool, safe: t.Optional[bool] = None 6074 ) -> t.Optional[exp.Expression]: 6075 this = self._parse_bitwise() 6076 6077 if self._match(TokenType.USING): 6078 to: t.Optional[exp.Expression] = self.expression( 6079 exp.CharacterSet, this=self._parse_var() 6080 ) 6081 elif self._match(TokenType.COMMA): 6082 to = self._parse_types() 6083 else: 6084 to = None 6085 6086 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6087 6088 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6089 """ 6090 There are generally two variants of the DECODE function: 6091 6092 - DECODE(bin, charset) 6093 - DECODE(expression, search, result [, search, result] ... [, default]) 6094 6095 The second variant will always be parsed into a CASE expression. Note that NULL 6096 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6097 instead of relying on pattern matching. 6098 """ 6099 args = self._parse_csv(self._parse_assignment) 6100 6101 if len(args) < 3: 6102 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6103 6104 expression, *expressions = args 6105 if not expression: 6106 return None 6107 6108 ifs = [] 6109 for search, result in zip(expressions[::2], expressions[1::2]): 6110 if not search or not result: 6111 return None 6112 6113 if isinstance(search, exp.Literal): 6114 ifs.append( 6115 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6116 ) 6117 elif isinstance(search, exp.Null): 6118 ifs.append( 6119 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6120 ) 6121 else: 6122 cond = exp.or_( 6123 exp.EQ(this=expression.copy(), expression=search), 6124 exp.and_( 6125 exp.Is(this=expression.copy(), expression=exp.Null()), 6126 exp.Is(this=search.copy(), expression=exp.Null()), 6127 copy=False, 6128 ), 6129 copy=False, 6130 ) 6131 ifs.append(exp.If(this=cond, true=result)) 6132 6133 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6134 6135 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6136 self._match_text_seq("KEY") 6137 key = self._parse_column() 6138 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6139 self._match_text_seq("VALUE") 6140 value = self._parse_bitwise() 6141 6142 if not key and not value: 6143 return None 6144 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6145 6146 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6147 if not this or not self._match_text_seq("FORMAT", "JSON"): 6148 return this 6149 6150 return self.expression(exp.FormatJson, this=this) 6151 6152 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6153 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6154 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6155 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6156 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6157 else: 6158 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6159 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6160 6161 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6162 6163 if not empty and not error and not null: 6164 return None 6165 6166 return self.expression( 6167 exp.OnCondition, 6168 empty=empty, 6169 error=error, 6170 null=null, 6171 ) 6172 6173 def _parse_on_handling( 6174 self, on: str, *values: str 6175 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6176 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6177 for value in values: 6178 if self._match_text_seq(value, "ON", on): 6179 return f"{value} ON {on}" 6180 6181 index = self._index 6182 if self._match(TokenType.DEFAULT): 6183 default_value = self._parse_bitwise() 6184 if self._match_text_seq("ON", on): 6185 return default_value 6186 6187 self._retreat(index) 6188 6189 return None 6190 6191 @t.overload 6192 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6193 6194 @t.overload 6195 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6196 6197 def _parse_json_object(self, agg=False): 6198 star = self._parse_star() 6199 expressions = ( 6200 [star] 6201 if star 6202 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6203 ) 6204 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6205 6206 unique_keys = None 6207 if self._match_text_seq("WITH", "UNIQUE"): 6208 unique_keys = True 6209 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6210 unique_keys = False 6211 6212 self._match_text_seq("KEYS") 6213 6214 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6215 self._parse_type() 6216 ) 6217 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6218 6219 return self.expression( 6220 exp.JSONObjectAgg if agg else exp.JSONObject, 6221 expressions=expressions, 6222 null_handling=null_handling, 6223 unique_keys=unique_keys, 6224 return_type=return_type, 6225 encoding=encoding, 6226 ) 6227 6228 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6229 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6230 if not self._match_text_seq("NESTED"): 6231 this = self._parse_id_var() 6232 kind = self._parse_types(allow_identifiers=False) 6233 nested = None 6234 else: 6235 this = None 6236 kind = None 6237 nested = True 6238 6239 path = self._match_text_seq("PATH") and self._parse_string() 6240 nested_schema = nested and self._parse_json_schema() 6241 6242 return self.expression( 6243 exp.JSONColumnDef, 6244 this=this, 6245 kind=kind, 6246 path=path, 6247 nested_schema=nested_schema, 6248 ) 6249 6250 def _parse_json_schema(self) -> exp.JSONSchema: 6251 self._match_text_seq("COLUMNS") 6252 return self.expression( 6253 exp.JSONSchema, 6254 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6255 ) 6256 6257 def _parse_json_table(self) -> exp.JSONTable: 6258 this = self._parse_format_json(self._parse_bitwise()) 6259 path = self._match(TokenType.COMMA) and self._parse_string() 6260 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6261 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6262 schema = self._parse_json_schema() 6263 6264 return exp.JSONTable( 6265 this=this, 6266 schema=schema, 6267 path=path, 6268 error_handling=error_handling, 6269 empty_handling=empty_handling, 6270 ) 6271 6272 def _parse_match_against(self) -> exp.MatchAgainst: 6273 expressions = self._parse_csv(self._parse_column) 6274 6275 self._match_text_seq(")", "AGAINST", "(") 6276 6277 this = self._parse_string() 6278 6279 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6280 modifier = "IN NATURAL LANGUAGE MODE" 6281 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6282 modifier = f"{modifier} WITH QUERY EXPANSION" 6283 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6284 modifier = "IN BOOLEAN MODE" 6285 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6286 modifier = "WITH QUERY EXPANSION" 6287 else: 6288 modifier = None 6289 6290 return self.expression( 6291 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6292 ) 6293 6294 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6295 def _parse_open_json(self) -> exp.OpenJSON: 6296 this = self._parse_bitwise() 6297 path = self._match(TokenType.COMMA) and self._parse_string() 6298 6299 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6300 this = self._parse_field(any_token=True) 6301 kind = self._parse_types() 6302 path = self._parse_string() 6303 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6304 6305 return self.expression( 6306 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6307 ) 6308 6309 expressions = None 6310 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6311 self._match_l_paren() 6312 expressions = self._parse_csv(_parse_open_json_column_def) 6313 6314 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6315 6316 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6317 args = self._parse_csv(self._parse_bitwise) 6318 6319 if self._match(TokenType.IN): 6320 return self.expression( 6321 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6322 ) 6323 6324 if haystack_first: 6325 haystack = seq_get(args, 0) 6326 needle = seq_get(args, 1) 6327 else: 6328 needle = seq_get(args, 0) 6329 haystack = seq_get(args, 1) 6330 6331 return self.expression( 6332 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6333 ) 6334 6335 def _parse_predict(self) -> exp.Predict: 6336 self._match_text_seq("MODEL") 6337 this = self._parse_table() 6338 6339 self._match(TokenType.COMMA) 6340 self._match_text_seq("TABLE") 6341 6342 return self.expression( 6343 exp.Predict, 6344 this=this, 6345 expression=self._parse_table(), 6346 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6347 ) 6348 6349 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6350 args = self._parse_csv(self._parse_table) 6351 return exp.JoinHint(this=func_name.upper(), expressions=args) 6352 6353 def _parse_substring(self) -> exp.Substring: 6354 # Postgres supports the form: substring(string [from int] [for int]) 6355 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6356 6357 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6358 6359 if self._match(TokenType.FROM): 6360 args.append(self._parse_bitwise()) 6361 if self._match(TokenType.FOR): 6362 if len(args) == 1: 6363 args.append(exp.Literal.number(1)) 6364 args.append(self._parse_bitwise()) 6365 6366 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6367 6368 def _parse_trim(self) -> exp.Trim: 6369 # https://www.w3resource.com/sql/character-functions/trim.php 6370 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6371 6372 position = None 6373 collation = None 6374 expression = None 6375 6376 if self._match_texts(self.TRIM_TYPES): 6377 position = self._prev.text.upper() 6378 6379 this = self._parse_bitwise() 6380 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6381 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6382 expression = self._parse_bitwise() 6383 6384 if invert_order: 6385 this, expression = expression, this 6386 6387 if self._match(TokenType.COLLATE): 6388 collation = self._parse_bitwise() 6389 6390 return self.expression( 6391 exp.Trim, this=this, position=position, expression=expression, collation=collation 6392 ) 6393 6394 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6395 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6396 6397 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6398 return self._parse_window(self._parse_id_var(), alias=True) 6399 6400 def _parse_respect_or_ignore_nulls( 6401 self, this: t.Optional[exp.Expression] 6402 ) -> t.Optional[exp.Expression]: 6403 if self._match_text_seq("IGNORE", "NULLS"): 6404 return self.expression(exp.IgnoreNulls, this=this) 6405 if self._match_text_seq("RESPECT", "NULLS"): 6406 return self.expression(exp.RespectNulls, this=this) 6407 return this 6408 6409 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6410 if self._match(TokenType.HAVING): 6411 self._match_texts(("MAX", "MIN")) 6412 max = self._prev.text.upper() != "MIN" 6413 return self.expression( 6414 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6415 ) 6416 6417 return this 6418 6419 def _parse_window( 6420 self, this: t.Optional[exp.Expression], alias: bool = False 6421 ) -> t.Optional[exp.Expression]: 6422 func = this 6423 comments = func.comments if isinstance(func, exp.Expression) else None 6424 6425 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6426 self._match(TokenType.WHERE) 6427 this = self.expression( 6428 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6429 ) 6430 self._match_r_paren() 6431 6432 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6433 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6434 if self._match_text_seq("WITHIN", "GROUP"): 6435 order = self._parse_wrapped(self._parse_order) 6436 this = self.expression(exp.WithinGroup, this=this, expression=order) 6437 6438 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6439 # Some dialects choose to implement and some do not. 6440 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6441 6442 # There is some code above in _parse_lambda that handles 6443 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6444 6445 # The below changes handle 6446 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6447 6448 # Oracle allows both formats 6449 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6450 # and Snowflake chose to do the same for familiarity 6451 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6452 if isinstance(this, exp.AggFunc): 6453 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6454 6455 if ignore_respect and ignore_respect is not this: 6456 ignore_respect.replace(ignore_respect.this) 6457 this = self.expression(ignore_respect.__class__, this=this) 6458 6459 this = self._parse_respect_or_ignore_nulls(this) 6460 6461 # bigquery select from window x AS (partition by ...) 6462 if alias: 6463 over = None 6464 self._match(TokenType.ALIAS) 6465 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6466 return this 6467 else: 6468 over = self._prev.text.upper() 6469 6470 if comments and isinstance(func, exp.Expression): 6471 func.pop_comments() 6472 6473 if not self._match(TokenType.L_PAREN): 6474 return self.expression( 6475 exp.Window, 6476 comments=comments, 6477 this=this, 6478 alias=self._parse_id_var(False), 6479 over=over, 6480 ) 6481 6482 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6483 6484 first = self._match(TokenType.FIRST) 6485 if self._match_text_seq("LAST"): 6486 first = False 6487 6488 partition, order = self._parse_partition_and_order() 6489 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6490 6491 if kind: 6492 self._match(TokenType.BETWEEN) 6493 start = self._parse_window_spec() 6494 self._match(TokenType.AND) 6495 end = self._parse_window_spec() 6496 6497 spec = self.expression( 6498 exp.WindowSpec, 6499 kind=kind, 6500 start=start["value"], 6501 start_side=start["side"], 6502 end=end["value"], 6503 end_side=end["side"], 6504 ) 6505 else: 6506 spec = None 6507 6508 self._match_r_paren() 6509 6510 window = self.expression( 6511 exp.Window, 6512 comments=comments, 6513 this=this, 6514 partition_by=partition, 6515 order=order, 6516 spec=spec, 6517 alias=window_alias, 6518 over=over, 6519 first=first, 6520 ) 6521 6522 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6523 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6524 return self._parse_window(window, alias=alias) 6525 6526 return window 6527 6528 def _parse_partition_and_order( 6529 self, 6530 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6531 return self._parse_partition_by(), self._parse_order() 6532 6533 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6534 self._match(TokenType.BETWEEN) 6535 6536 return { 6537 "value": ( 6538 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6539 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6540 or self._parse_bitwise() 6541 ), 6542 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6543 } 6544 6545 def _parse_alias( 6546 self, this: t.Optional[exp.Expression], explicit: bool = False 6547 ) -> t.Optional[exp.Expression]: 6548 any_token = self._match(TokenType.ALIAS) 6549 comments = self._prev_comments or [] 6550 6551 if explicit and not any_token: 6552 return this 6553 6554 if self._match(TokenType.L_PAREN): 6555 aliases = self.expression( 6556 exp.Aliases, 6557 comments=comments, 6558 this=this, 6559 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6560 ) 6561 self._match_r_paren(aliases) 6562 return aliases 6563 6564 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6565 self.STRING_ALIASES and self._parse_string_as_identifier() 6566 ) 6567 6568 if alias: 6569 comments.extend(alias.pop_comments()) 6570 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6571 column = this.this 6572 6573 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6574 if not this.comments and column and column.comments: 6575 this.comments = column.pop_comments() 6576 6577 return this 6578 6579 def _parse_id_var( 6580 self, 6581 any_token: bool = True, 6582 tokens: t.Optional[t.Collection[TokenType]] = None, 6583 ) -> t.Optional[exp.Expression]: 6584 expression = self._parse_identifier() 6585 if not expression and ( 6586 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6587 ): 6588 quoted = self._prev.token_type == TokenType.STRING 6589 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6590 6591 return expression 6592 6593 def _parse_string(self) -> t.Optional[exp.Expression]: 6594 if self._match_set(self.STRING_PARSERS): 6595 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6596 return self._parse_placeholder() 6597 6598 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6599 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6600 6601 def _parse_number(self) -> t.Optional[exp.Expression]: 6602 if self._match_set(self.NUMERIC_PARSERS): 6603 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6604 return self._parse_placeholder() 6605 6606 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6607 if self._match(TokenType.IDENTIFIER): 6608 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6609 return self._parse_placeholder() 6610 6611 def _parse_var( 6612 self, 6613 any_token: bool = False, 6614 tokens: t.Optional[t.Collection[TokenType]] = None, 6615 upper: bool = False, 6616 ) -> t.Optional[exp.Expression]: 6617 if ( 6618 (any_token and self._advance_any()) 6619 or self._match(TokenType.VAR) 6620 or (self._match_set(tokens) if tokens else False) 6621 ): 6622 return self.expression( 6623 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6624 ) 6625 return self._parse_placeholder() 6626 6627 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6628 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6629 self._advance() 6630 return self._prev 6631 return None 6632 6633 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6634 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6635 6636 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6637 return self._parse_primary() or self._parse_var(any_token=True) 6638 6639 def _parse_null(self) -> t.Optional[exp.Expression]: 6640 if self._match_set(self.NULL_TOKENS): 6641 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6642 return self._parse_placeholder() 6643 6644 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6645 if self._match(TokenType.TRUE): 6646 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6647 if self._match(TokenType.FALSE): 6648 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6649 return self._parse_placeholder() 6650 6651 def _parse_star(self) -> t.Optional[exp.Expression]: 6652 if self._match(TokenType.STAR): 6653 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6654 return self._parse_placeholder() 6655 6656 def _parse_parameter(self) -> exp.Parameter: 6657 this = self._parse_identifier() or self._parse_primary_or_var() 6658 return self.expression(exp.Parameter, this=this) 6659 6660 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6661 if self._match_set(self.PLACEHOLDER_PARSERS): 6662 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6663 if placeholder: 6664 return placeholder 6665 self._advance(-1) 6666 return None 6667 6668 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6669 if not self._match_texts(keywords): 6670 return None 6671 if self._match(TokenType.L_PAREN, advance=False): 6672 return self._parse_wrapped_csv(self._parse_expression) 6673 6674 expression = self._parse_expression() 6675 return [expression] if expression else None 6676 6677 def _parse_csv( 6678 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6679 ) -> t.List[exp.Expression]: 6680 parse_result = parse_method() 6681 items = [parse_result] if parse_result is not None else [] 6682 6683 while self._match(sep): 6684 self._add_comments(parse_result) 6685 parse_result = parse_method() 6686 if parse_result is not None: 6687 items.append(parse_result) 6688 6689 return items 6690 6691 def _parse_tokens( 6692 self, parse_method: t.Callable, expressions: t.Dict 6693 ) -> t.Optional[exp.Expression]: 6694 this = parse_method() 6695 6696 while self._match_set(expressions): 6697 this = self.expression( 6698 expressions[self._prev.token_type], 6699 this=this, 6700 comments=self._prev_comments, 6701 expression=parse_method(), 6702 ) 6703 6704 return this 6705 6706 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6707 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6708 6709 def _parse_wrapped_csv( 6710 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6711 ) -> t.List[exp.Expression]: 6712 return self._parse_wrapped( 6713 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6714 ) 6715 6716 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6717 wrapped = self._match(TokenType.L_PAREN) 6718 if not wrapped and not optional: 6719 self.raise_error("Expecting (") 6720 parse_result = parse_method() 6721 if wrapped: 6722 self._match_r_paren() 6723 return parse_result 6724 6725 def _parse_expressions(self) -> t.List[exp.Expression]: 6726 return self._parse_csv(self._parse_expression) 6727 6728 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6729 return self._parse_select() or self._parse_set_operations( 6730 self._parse_expression() if alias else self._parse_assignment() 6731 ) 6732 6733 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6734 return self._parse_query_modifiers( 6735 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6736 ) 6737 6738 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6739 this = None 6740 if self._match_texts(self.TRANSACTION_KIND): 6741 this = self._prev.text 6742 6743 self._match_texts(("TRANSACTION", "WORK")) 6744 6745 modes = [] 6746 while True: 6747 mode = [] 6748 while self._match(TokenType.VAR): 6749 mode.append(self._prev.text) 6750 6751 if mode: 6752 modes.append(" ".join(mode)) 6753 if not self._match(TokenType.COMMA): 6754 break 6755 6756 return self.expression(exp.Transaction, this=this, modes=modes) 6757 6758 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6759 chain = None 6760 savepoint = None 6761 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6762 6763 self._match_texts(("TRANSACTION", "WORK")) 6764 6765 if self._match_text_seq("TO"): 6766 self._match_text_seq("SAVEPOINT") 6767 savepoint = self._parse_id_var() 6768 6769 if self._match(TokenType.AND): 6770 chain = not self._match_text_seq("NO") 6771 self._match_text_seq("CHAIN") 6772 6773 if is_rollback: 6774 return self.expression(exp.Rollback, savepoint=savepoint) 6775 6776 return self.expression(exp.Commit, chain=chain) 6777 6778 def _parse_refresh(self) -> exp.Refresh: 6779 self._match(TokenType.TABLE) 6780 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6781 6782 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6783 if not self._match_text_seq("ADD"): 6784 return None 6785 6786 self._match(TokenType.COLUMN) 6787 exists_column = self._parse_exists(not_=True) 6788 expression = self._parse_field_def() 6789 6790 if expression: 6791 expression.set("exists", exists_column) 6792 6793 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6794 if self._match_texts(("FIRST", "AFTER")): 6795 position = self._prev.text 6796 column_position = self.expression( 6797 exp.ColumnPosition, this=self._parse_column(), position=position 6798 ) 6799 expression.set("position", column_position) 6800 6801 return expression 6802 6803 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6804 drop = self._match(TokenType.DROP) and self._parse_drop() 6805 if drop and not isinstance(drop, exp.Command): 6806 drop.set("kind", drop.args.get("kind", "COLUMN")) 6807 return drop 6808 6809 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6810 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6811 return self.expression( 6812 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6813 ) 6814 6815 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6816 index = self._index - 1 6817 6818 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6819 return self._parse_csv( 6820 lambda: self.expression( 6821 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6822 ) 6823 ) 6824 6825 self._retreat(index) 6826 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6827 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6828 6829 if self._match_text_seq("ADD", "COLUMNS"): 6830 schema = self._parse_schema() 6831 if schema: 6832 return [schema] 6833 return [] 6834 6835 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6836 6837 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6838 if self._match_texts(self.ALTER_ALTER_PARSERS): 6839 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6840 6841 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6842 # keyword after ALTER we default to parsing this statement 6843 self._match(TokenType.COLUMN) 6844 column = self._parse_field(any_token=True) 6845 6846 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6847 return self.expression(exp.AlterColumn, this=column, drop=True) 6848 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6849 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6850 if self._match(TokenType.COMMENT): 6851 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6852 if self._match_text_seq("DROP", "NOT", "NULL"): 6853 return self.expression( 6854 exp.AlterColumn, 6855 this=column, 6856 drop=True, 6857 allow_null=True, 6858 ) 6859 if self._match_text_seq("SET", "NOT", "NULL"): 6860 return self.expression( 6861 exp.AlterColumn, 6862 this=column, 6863 allow_null=False, 6864 ) 6865 self._match_text_seq("SET", "DATA") 6866 self._match_text_seq("TYPE") 6867 return self.expression( 6868 exp.AlterColumn, 6869 this=column, 6870 dtype=self._parse_types(), 6871 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6872 using=self._match(TokenType.USING) and self._parse_assignment(), 6873 ) 6874 6875 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6876 if self._match_texts(("ALL", "EVEN", "AUTO")): 6877 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6878 6879 self._match_text_seq("KEY", "DISTKEY") 6880 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6881 6882 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6883 if compound: 6884 self._match_text_seq("SORTKEY") 6885 6886 if self._match(TokenType.L_PAREN, advance=False): 6887 return self.expression( 6888 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6889 ) 6890 6891 self._match_texts(("AUTO", "NONE")) 6892 return self.expression( 6893 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6894 ) 6895 6896 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6897 index = self._index - 1 6898 6899 partition_exists = self._parse_exists() 6900 if self._match(TokenType.PARTITION, advance=False): 6901 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6902 6903 self._retreat(index) 6904 return self._parse_csv(self._parse_drop_column) 6905 6906 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6907 if self._match(TokenType.COLUMN): 6908 exists = self._parse_exists() 6909 old_column = self._parse_column() 6910 to = self._match_text_seq("TO") 6911 new_column = self._parse_column() 6912 6913 if old_column is None or to is None or new_column is None: 6914 return None 6915 6916 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6917 6918 self._match_text_seq("TO") 6919 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6920 6921 def _parse_alter_table_set(self) -> exp.AlterSet: 6922 alter_set = self.expression(exp.AlterSet) 6923 6924 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6925 "TABLE", "PROPERTIES" 6926 ): 6927 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6928 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6929 alter_set.set("expressions", [self._parse_assignment()]) 6930 elif self._match_texts(("LOGGED", "UNLOGGED")): 6931 alter_set.set("option", exp.var(self._prev.text.upper())) 6932 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6933 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6934 elif self._match_text_seq("LOCATION"): 6935 alter_set.set("location", self._parse_field()) 6936 elif self._match_text_seq("ACCESS", "METHOD"): 6937 alter_set.set("access_method", self._parse_field()) 6938 elif self._match_text_seq("TABLESPACE"): 6939 alter_set.set("tablespace", self._parse_field()) 6940 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6941 alter_set.set("file_format", [self._parse_field()]) 6942 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6943 alter_set.set("file_format", self._parse_wrapped_options()) 6944 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6945 alter_set.set("copy_options", self._parse_wrapped_options()) 6946 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6947 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6948 else: 6949 if self._match_text_seq("SERDE"): 6950 alter_set.set("serde", self._parse_field()) 6951 6952 alter_set.set("expressions", [self._parse_properties()]) 6953 6954 return alter_set 6955 6956 def _parse_alter(self) -> exp.Alter | exp.Command: 6957 start = self._prev 6958 6959 alter_token = self._match_set(self.ALTERABLES) and self._prev 6960 if not alter_token: 6961 return self._parse_as_command(start) 6962 6963 exists = self._parse_exists() 6964 only = self._match_text_seq("ONLY") 6965 this = self._parse_table(schema=True) 6966 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6967 6968 if self._next: 6969 self._advance() 6970 6971 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6972 if parser: 6973 actions = ensure_list(parser(self)) 6974 not_valid = self._match_text_seq("NOT", "VALID") 6975 options = self._parse_csv(self._parse_property) 6976 6977 if not self._curr and actions: 6978 return self.expression( 6979 exp.Alter, 6980 this=this, 6981 kind=alter_token.text.upper(), 6982 exists=exists, 6983 actions=actions, 6984 only=only, 6985 options=options, 6986 cluster=cluster, 6987 not_valid=not_valid, 6988 ) 6989 6990 return self._parse_as_command(start) 6991 6992 def _parse_merge(self) -> exp.Merge: 6993 self._match(TokenType.INTO) 6994 target = self._parse_table() 6995 6996 if target and self._match(TokenType.ALIAS, advance=False): 6997 target.set("alias", self._parse_table_alias()) 6998 6999 self._match(TokenType.USING) 7000 using = self._parse_table() 7001 7002 self._match(TokenType.ON) 7003 on = self._parse_assignment() 7004 7005 return self.expression( 7006 exp.Merge, 7007 this=target, 7008 using=using, 7009 on=on, 7010 expressions=self._parse_when_matched(), 7011 returning=self._parse_returning(), 7012 ) 7013 7014 def _parse_when_matched(self) -> t.List[exp.When]: 7015 whens = [] 7016 7017 while self._match(TokenType.WHEN): 7018 matched = not self._match(TokenType.NOT) 7019 self._match_text_seq("MATCHED") 7020 source = ( 7021 False 7022 if self._match_text_seq("BY", "TARGET") 7023 else self._match_text_seq("BY", "SOURCE") 7024 ) 7025 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7026 7027 self._match(TokenType.THEN) 7028 7029 if self._match(TokenType.INSERT): 7030 this = self._parse_star() 7031 if this: 7032 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7033 else: 7034 then = self.expression( 7035 exp.Insert, 7036 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7037 expression=self._match_text_seq("VALUES") and self._parse_value(), 7038 ) 7039 elif self._match(TokenType.UPDATE): 7040 expressions = self._parse_star() 7041 if expressions: 7042 then = self.expression(exp.Update, expressions=expressions) 7043 else: 7044 then = self.expression( 7045 exp.Update, 7046 expressions=self._match(TokenType.SET) 7047 and self._parse_csv(self._parse_equality), 7048 ) 7049 elif self._match(TokenType.DELETE): 7050 then = self.expression(exp.Var, this=self._prev.text) 7051 else: 7052 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7053 7054 whens.append( 7055 self.expression( 7056 exp.When, 7057 matched=matched, 7058 source=source, 7059 condition=condition, 7060 then=then, 7061 ) 7062 ) 7063 return whens 7064 7065 def _parse_show(self) -> t.Optional[exp.Expression]: 7066 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7067 if parser: 7068 return parser(self) 7069 return self._parse_as_command(self._prev) 7070 7071 def _parse_set_item_assignment( 7072 self, kind: t.Optional[str] = None 7073 ) -> t.Optional[exp.Expression]: 7074 index = self._index 7075 7076 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7077 return self._parse_set_transaction(global_=kind == "GLOBAL") 7078 7079 left = self._parse_primary() or self._parse_column() 7080 assignment_delimiter = self._match_texts(("=", "TO")) 7081 7082 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7083 self._retreat(index) 7084 return None 7085 7086 right = self._parse_statement() or self._parse_id_var() 7087 if isinstance(right, (exp.Column, exp.Identifier)): 7088 right = exp.var(right.name) 7089 7090 this = self.expression(exp.EQ, this=left, expression=right) 7091 return self.expression(exp.SetItem, this=this, kind=kind) 7092 7093 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7094 self._match_text_seq("TRANSACTION") 7095 characteristics = self._parse_csv( 7096 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7097 ) 7098 return self.expression( 7099 exp.SetItem, 7100 expressions=characteristics, 7101 kind="TRANSACTION", 7102 **{"global": global_}, # type: ignore 7103 ) 7104 7105 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7106 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7107 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7108 7109 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7110 index = self._index 7111 set_ = self.expression( 7112 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7113 ) 7114 7115 if self._curr: 7116 self._retreat(index) 7117 return self._parse_as_command(self._prev) 7118 7119 return set_ 7120 7121 def _parse_var_from_options( 7122 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7123 ) -> t.Optional[exp.Var]: 7124 start = self._curr 7125 if not start: 7126 return None 7127 7128 option = start.text.upper() 7129 continuations = options.get(option) 7130 7131 index = self._index 7132 self._advance() 7133 for keywords in continuations or []: 7134 if isinstance(keywords, str): 7135 keywords = (keywords,) 7136 7137 if self._match_text_seq(*keywords): 7138 option = f"{option} {' '.join(keywords)}" 7139 break 7140 else: 7141 if continuations or continuations is None: 7142 if raise_unmatched: 7143 self.raise_error(f"Unknown option {option}") 7144 7145 self._retreat(index) 7146 return None 7147 7148 return exp.var(option) 7149 7150 def _parse_as_command(self, start: Token) -> exp.Command: 7151 while self._curr: 7152 self._advance() 7153 text = self._find_sql(start, self._prev) 7154 size = len(start.text) 7155 self._warn_unsupported() 7156 return exp.Command(this=text[:size], expression=text[size:]) 7157 7158 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7159 settings = [] 7160 7161 self._match_l_paren() 7162 kind = self._parse_id_var() 7163 7164 if self._match(TokenType.L_PAREN): 7165 while True: 7166 key = self._parse_id_var() 7167 value = self._parse_primary() 7168 if not key and value is None: 7169 break 7170 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7171 self._match(TokenType.R_PAREN) 7172 7173 self._match_r_paren() 7174 7175 return self.expression( 7176 exp.DictProperty, 7177 this=this, 7178 kind=kind.this if kind else None, 7179 settings=settings, 7180 ) 7181 7182 def _parse_dict_range(self, this: str) -> exp.DictRange: 7183 self._match_l_paren() 7184 has_min = self._match_text_seq("MIN") 7185 if has_min: 7186 min = self._parse_var() or self._parse_primary() 7187 self._match_text_seq("MAX") 7188 max = self._parse_var() or self._parse_primary() 7189 else: 7190 max = self._parse_var() or self._parse_primary() 7191 min = exp.Literal.number(0) 7192 self._match_r_paren() 7193 return self.expression(exp.DictRange, this=this, min=min, max=max) 7194 7195 def _parse_comprehension( 7196 self, this: t.Optional[exp.Expression] 7197 ) -> t.Optional[exp.Comprehension]: 7198 index = self._index 7199 expression = self._parse_column() 7200 if not self._match(TokenType.IN): 7201 self._retreat(index - 1) 7202 return None 7203 iterator = self._parse_column() 7204 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7205 return self.expression( 7206 exp.Comprehension, 7207 this=this, 7208 expression=expression, 7209 iterator=iterator, 7210 condition=condition, 7211 ) 7212 7213 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7214 if self._match(TokenType.HEREDOC_STRING): 7215 return self.expression(exp.Heredoc, this=self._prev.text) 7216 7217 if not self._match_text_seq("$"): 7218 return None 7219 7220 tags = ["$"] 7221 tag_text = None 7222 7223 if self._is_connected(): 7224 self._advance() 7225 tags.append(self._prev.text.upper()) 7226 else: 7227 self.raise_error("No closing $ found") 7228 7229 if tags[-1] != "$": 7230 if self._is_connected() and self._match_text_seq("$"): 7231 tag_text = tags[-1] 7232 tags.append("$") 7233 else: 7234 self.raise_error("No closing $ found") 7235 7236 heredoc_start = self._curr 7237 7238 while self._curr: 7239 if self._match_text_seq(*tags, advance=False): 7240 this = self._find_sql(heredoc_start, self._prev) 7241 self._advance(len(tags)) 7242 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7243 7244 self._advance() 7245 7246 self.raise_error(f"No closing {''.join(tags)} found") 7247 return None 7248 7249 def _find_parser( 7250 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7251 ) -> t.Optional[t.Callable]: 7252 if not self._curr: 7253 return None 7254 7255 index = self._index 7256 this = [] 7257 while True: 7258 # The current token might be multiple words 7259 curr = self._curr.text.upper() 7260 key = curr.split(" ") 7261 this.append(curr) 7262 7263 self._advance() 7264 result, trie = in_trie(trie, key) 7265 if result == TrieResult.FAILED: 7266 break 7267 7268 if result == TrieResult.EXISTS: 7269 subparser = parsers[" ".join(this)] 7270 return subparser 7271 7272 self._retreat(index) 7273 return None 7274 7275 def _match(self, token_type, advance=True, expression=None): 7276 if not self._curr: 7277 return None 7278 7279 if self._curr.token_type == token_type: 7280 if advance: 7281 self._advance() 7282 self._add_comments(expression) 7283 return True 7284 7285 return None 7286 7287 def _match_set(self, types, advance=True): 7288 if not self._curr: 7289 return None 7290 7291 if self._curr.token_type in types: 7292 if advance: 7293 self._advance() 7294 return True 7295 7296 return None 7297 7298 def _match_pair(self, token_type_a, token_type_b, advance=True): 7299 if not self._curr or not self._next: 7300 return None 7301 7302 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7303 if advance: 7304 self._advance(2) 7305 return True 7306 7307 return None 7308 7309 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7310 if not self._match(TokenType.L_PAREN, expression=expression): 7311 self.raise_error("Expecting (") 7312 7313 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7314 if not self._match(TokenType.R_PAREN, expression=expression): 7315 self.raise_error("Expecting )") 7316 7317 def _match_texts(self, texts, advance=True): 7318 if ( 7319 self._curr 7320 and self._curr.token_type != TokenType.STRING 7321 and self._curr.text.upper() in texts 7322 ): 7323 if advance: 7324 self._advance() 7325 return True 7326 return None 7327 7328 def _match_text_seq(self, *texts, advance=True): 7329 index = self._index 7330 for text in texts: 7331 if ( 7332 self._curr 7333 and self._curr.token_type != TokenType.STRING 7334 and self._curr.text.upper() == text 7335 ): 7336 self._advance() 7337 else: 7338 self._retreat(index) 7339 return None 7340 7341 if not advance: 7342 self._retreat(index) 7343 7344 return True 7345 7346 def _replace_lambda( 7347 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7348 ) -> t.Optional[exp.Expression]: 7349 if not node: 7350 return node 7351 7352 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7353 7354 for column in node.find_all(exp.Column): 7355 typ = lambda_types.get(column.parts[0].name) 7356 if typ is not None: 7357 dot_or_id = column.to_dot() if column.table else column.this 7358 7359 if typ: 7360 dot_or_id = self.expression( 7361 exp.Cast, 7362 this=dot_or_id, 7363 to=typ, 7364 ) 7365 7366 parent = column.parent 7367 7368 while isinstance(parent, exp.Dot): 7369 if not isinstance(parent.parent, exp.Dot): 7370 parent.replace(dot_or_id) 7371 break 7372 parent = parent.parent 7373 else: 7374 if column is node: 7375 node = dot_or_id 7376 else: 7377 column.replace(dot_or_id) 7378 return node 7379 7380 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7381 start = self._prev 7382 7383 # Not to be confused with TRUNCATE(number, decimals) function call 7384 if self._match(TokenType.L_PAREN): 7385 self._retreat(self._index - 2) 7386 return self._parse_function() 7387 7388 # Clickhouse supports TRUNCATE DATABASE as well 7389 is_database = self._match(TokenType.DATABASE) 7390 7391 self._match(TokenType.TABLE) 7392 7393 exists = self._parse_exists(not_=False) 7394 7395 expressions = self._parse_csv( 7396 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7397 ) 7398 7399 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7400 7401 if self._match_text_seq("RESTART", "IDENTITY"): 7402 identity = "RESTART" 7403 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7404 identity = "CONTINUE" 7405 else: 7406 identity = None 7407 7408 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7409 option = self._prev.text 7410 else: 7411 option = None 7412 7413 partition = self._parse_partition() 7414 7415 # Fallback case 7416 if self._curr: 7417 return self._parse_as_command(start) 7418 7419 return self.expression( 7420 exp.TruncateTable, 7421 expressions=expressions, 7422 is_database=is_database, 7423 exists=exists, 7424 cluster=cluster, 7425 identity=identity, 7426 option=option, 7427 partition=partition, 7428 ) 7429 7430 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7431 this = self._parse_ordered(self._parse_opclass) 7432 7433 if not self._match(TokenType.WITH): 7434 return this 7435 7436 op = self._parse_var(any_token=True) 7437 7438 return self.expression(exp.WithOperator, this=this, op=op) 7439 7440 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7441 self._match(TokenType.EQ) 7442 self._match(TokenType.L_PAREN) 7443 7444 opts: t.List[t.Optional[exp.Expression]] = [] 7445 while self._curr and not self._match(TokenType.R_PAREN): 7446 if self._match_text_seq("FORMAT_NAME", "="): 7447 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7448 # so we parse it separately to use _parse_field() 7449 prop = self.expression( 7450 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7451 ) 7452 opts.append(prop) 7453 else: 7454 opts.append(self._parse_property()) 7455 7456 self._match(TokenType.COMMA) 7457 7458 return opts 7459 7460 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7461 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7462 7463 options = [] 7464 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7465 option = self._parse_var(any_token=True) 7466 prev = self._prev.text.upper() 7467 7468 # Different dialects might separate options and values by white space, "=" and "AS" 7469 self._match(TokenType.EQ) 7470 self._match(TokenType.ALIAS) 7471 7472 param = self.expression(exp.CopyParameter, this=option) 7473 7474 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7475 TokenType.L_PAREN, advance=False 7476 ): 7477 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7478 param.set("expressions", self._parse_wrapped_options()) 7479 elif prev == "FILE_FORMAT": 7480 # T-SQL's external file format case 7481 param.set("expression", self._parse_field()) 7482 else: 7483 param.set("expression", self._parse_unquoted_field()) 7484 7485 options.append(param) 7486 self._match(sep) 7487 7488 return options 7489 7490 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7491 expr = self.expression(exp.Credentials) 7492 7493 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7494 expr.set("storage", self._parse_field()) 7495 if self._match_text_seq("CREDENTIALS"): 7496 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7497 creds = ( 7498 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7499 ) 7500 expr.set("credentials", creds) 7501 if self._match_text_seq("ENCRYPTION"): 7502 expr.set("encryption", self._parse_wrapped_options()) 7503 if self._match_text_seq("IAM_ROLE"): 7504 expr.set("iam_role", self._parse_field()) 7505 if self._match_text_seq("REGION"): 7506 expr.set("region", self._parse_field()) 7507 7508 return expr 7509 7510 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7511 return self._parse_field() 7512 7513 def _parse_copy(self) -> exp.Copy | exp.Command: 7514 start = self._prev 7515 7516 self._match(TokenType.INTO) 7517 7518 this = ( 7519 self._parse_select(nested=True, parse_subquery_alias=False) 7520 if self._match(TokenType.L_PAREN, advance=False) 7521 else self._parse_table(schema=True) 7522 ) 7523 7524 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7525 7526 files = self._parse_csv(self._parse_file_location) 7527 credentials = self._parse_credentials() 7528 7529 self._match_text_seq("WITH") 7530 7531 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7532 7533 # Fallback case 7534 if self._curr: 7535 return self._parse_as_command(start) 7536 7537 return self.expression( 7538 exp.Copy, 7539 this=this, 7540 kind=kind, 7541 credentials=credentials, 7542 files=files, 7543 params=params, 7544 ) 7545 7546 def _parse_normalize(self) -> exp.Normalize: 7547 return self.expression( 7548 exp.Normalize, 7549 this=self._parse_bitwise(), 7550 form=self._match(TokenType.COMMA) and self._parse_var(), 7551 ) 7552 7553 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7554 if self._match_text_seq("COLUMNS", "(", advance=False): 7555 this = self._parse_function() 7556 if isinstance(this, exp.Columns): 7557 this.set("unpack", True) 7558 return this 7559 7560 return self.expression( 7561 exp.Star, 7562 **{ # type: ignore 7563 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7564 "replace": self._parse_star_op("REPLACE"), 7565 "rename": self._parse_star_op("RENAME"), 7566 }, 7567 ) 7568 7569 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7570 privilege_parts = [] 7571 7572 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7573 # (end of privilege list) or L_PAREN (start of column list) are met 7574 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7575 privilege_parts.append(self._curr.text.upper()) 7576 self._advance() 7577 7578 this = exp.var(" ".join(privilege_parts)) 7579 expressions = ( 7580 self._parse_wrapped_csv(self._parse_column) 7581 if self._match(TokenType.L_PAREN, advance=False) 7582 else None 7583 ) 7584 7585 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7586 7587 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7588 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7589 principal = self._parse_id_var() 7590 7591 if not principal: 7592 return None 7593 7594 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7595 7596 def _parse_grant(self) -> exp.Grant | exp.Command: 7597 start = self._prev 7598 7599 privileges = self._parse_csv(self._parse_grant_privilege) 7600 7601 self._match(TokenType.ON) 7602 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7603 7604 # Attempt to parse the securable e.g. MySQL allows names 7605 # such as "foo.*", "*.*" which are not easily parseable yet 7606 securable = self._try_parse(self._parse_table_parts) 7607 7608 if not securable or not self._match_text_seq("TO"): 7609 return self._parse_as_command(start) 7610 7611 principals = self._parse_csv(self._parse_grant_principal) 7612 7613 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7614 7615 if self._curr: 7616 return self._parse_as_command(start) 7617 7618 return self.expression( 7619 exp.Grant, 7620 privileges=privileges, 7621 kind=kind, 7622 securable=securable, 7623 principals=principals, 7624 grant_option=grant_option, 7625 ) 7626 7627 def _parse_overlay(self) -> exp.Overlay: 7628 return self.expression( 7629 exp.Overlay, 7630 **{ # type: ignore 7631 "this": self._parse_bitwise(), 7632 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7633 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7634 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7635 }, 7636 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.DECIMAL256, 356 TokenType.UDECIMAL, 357 TokenType.BIGDECIMAL, 358 TokenType.UUID, 359 TokenType.GEOGRAPHY, 360 TokenType.GEOMETRY, 361 TokenType.POINT, 362 TokenType.RING, 363 TokenType.LINESTRING, 364 TokenType.MULTILINESTRING, 365 TokenType.POLYGON, 366 TokenType.MULTIPOLYGON, 367 TokenType.HLLSKETCH, 368 TokenType.HSTORE, 369 TokenType.PSEUDO_TYPE, 370 TokenType.SUPER, 371 TokenType.SERIAL, 372 TokenType.SMALLSERIAL, 373 TokenType.BIGSERIAL, 374 TokenType.XML, 375 TokenType.YEAR, 376 TokenType.UNIQUEIDENTIFIER, 377 TokenType.USERDEFINED, 378 TokenType.MONEY, 379 TokenType.SMALLMONEY, 380 TokenType.ROWVERSION, 381 TokenType.IMAGE, 382 TokenType.VARIANT, 383 TokenType.VECTOR, 384 TokenType.OBJECT, 385 TokenType.OBJECT_IDENTIFIER, 386 TokenType.INET, 387 TokenType.IPADDRESS, 388 TokenType.IPPREFIX, 389 TokenType.IPV4, 390 TokenType.IPV6, 391 TokenType.UNKNOWN, 392 TokenType.NULL, 393 TokenType.NAME, 394 TokenType.TDIGEST, 395 *ENUM_TYPE_TOKENS, 396 *NESTED_TYPE_TOKENS, 397 *AGGREGATE_TYPE_TOKENS, 398 } 399 400 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 401 TokenType.BIGINT: TokenType.UBIGINT, 402 TokenType.INT: TokenType.UINT, 403 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 404 TokenType.SMALLINT: TokenType.USMALLINT, 405 TokenType.TINYINT: TokenType.UTINYINT, 406 TokenType.DECIMAL: TokenType.UDECIMAL, 407 } 408 409 SUBQUERY_PREDICATES = { 410 TokenType.ANY: exp.Any, 411 TokenType.ALL: exp.All, 412 TokenType.EXISTS: exp.Exists, 413 TokenType.SOME: exp.Any, 414 } 415 416 RESERVED_TOKENS = { 417 *Tokenizer.SINGLE_TOKENS.values(), 418 TokenType.SELECT, 419 } - {TokenType.IDENTIFIER} 420 421 DB_CREATABLES = { 422 TokenType.DATABASE, 423 TokenType.DICTIONARY, 424 TokenType.MODEL, 425 TokenType.SCHEMA, 426 TokenType.SEQUENCE, 427 TokenType.STORAGE_INTEGRATION, 428 TokenType.TABLE, 429 TokenType.TAG, 430 TokenType.VIEW, 431 TokenType.WAREHOUSE, 432 TokenType.STREAMLIT, 433 TokenType.SINK, 434 TokenType.SOURCE, 435 } 436 437 CREATABLES = { 438 TokenType.COLUMN, 439 TokenType.CONSTRAINT, 440 TokenType.FOREIGN_KEY, 441 TokenType.FUNCTION, 442 TokenType.INDEX, 443 TokenType.PROCEDURE, 444 *DB_CREATABLES, 445 } 446 447 ALTERABLES = { 448 TokenType.INDEX, 449 TokenType.TABLE, 450 TokenType.VIEW, 451 } 452 453 # Tokens that can represent identifiers 454 ID_VAR_TOKENS = { 455 TokenType.ALL, 456 TokenType.ATTACH, 457 TokenType.VAR, 458 TokenType.ANTI, 459 TokenType.APPLY, 460 TokenType.ASC, 461 TokenType.ASOF, 462 TokenType.AUTO_INCREMENT, 463 TokenType.BEGIN, 464 TokenType.BPCHAR, 465 TokenType.CACHE, 466 TokenType.CASE, 467 TokenType.COLLATE, 468 TokenType.COMMAND, 469 TokenType.COMMENT, 470 TokenType.COMMIT, 471 TokenType.CONSTRAINT, 472 TokenType.COPY, 473 TokenType.CUBE, 474 TokenType.DEFAULT, 475 TokenType.DELETE, 476 TokenType.DESC, 477 TokenType.DESCRIBE, 478 TokenType.DETACH, 479 TokenType.DICTIONARY, 480 TokenType.DIV, 481 TokenType.END, 482 TokenType.EXECUTE, 483 TokenType.ESCAPE, 484 TokenType.FALSE, 485 TokenType.FIRST, 486 TokenType.FILTER, 487 TokenType.FINAL, 488 TokenType.FORMAT, 489 TokenType.FULL, 490 TokenType.IDENTIFIER, 491 TokenType.IS, 492 TokenType.ISNULL, 493 TokenType.INTERVAL, 494 TokenType.KEEP, 495 TokenType.KILL, 496 TokenType.LEFT, 497 TokenType.LOAD, 498 TokenType.MERGE, 499 TokenType.NATURAL, 500 TokenType.NEXT, 501 TokenType.OFFSET, 502 TokenType.OPERATOR, 503 TokenType.ORDINALITY, 504 TokenType.OVERLAPS, 505 TokenType.OVERWRITE, 506 TokenType.PARTITION, 507 TokenType.PERCENT, 508 TokenType.PIVOT, 509 TokenType.PRAGMA, 510 TokenType.RANGE, 511 TokenType.RECURSIVE, 512 TokenType.REFERENCES, 513 TokenType.REFRESH, 514 TokenType.RENAME, 515 TokenType.REPLACE, 516 TokenType.RIGHT, 517 TokenType.ROLLUP, 518 TokenType.ROW, 519 TokenType.ROWS, 520 TokenType.SEMI, 521 TokenType.SET, 522 TokenType.SETTINGS, 523 TokenType.SHOW, 524 TokenType.TEMPORARY, 525 TokenType.TOP, 526 TokenType.TRUE, 527 TokenType.TRUNCATE, 528 TokenType.UNIQUE, 529 TokenType.UNNEST, 530 TokenType.UNPIVOT, 531 TokenType.UPDATE, 532 TokenType.USE, 533 TokenType.VOLATILE, 534 TokenType.WINDOW, 535 *CREATABLES, 536 *SUBQUERY_PREDICATES, 537 *TYPE_TOKENS, 538 *NO_PAREN_FUNCTIONS, 539 } 540 ID_VAR_TOKENS.remove(TokenType.UNION) 541 542 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 543 544 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 545 TokenType.ANTI, 546 TokenType.APPLY, 547 TokenType.ASOF, 548 TokenType.FULL, 549 TokenType.LEFT, 550 TokenType.LOCK, 551 TokenType.NATURAL, 552 TokenType.OFFSET, 553 TokenType.RIGHT, 554 TokenType.SEMI, 555 TokenType.WINDOW, 556 } 557 558 ALIAS_TOKENS = ID_VAR_TOKENS 559 560 ARRAY_CONSTRUCTORS = { 561 "ARRAY": exp.Array, 562 "LIST": exp.List, 563 } 564 565 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 566 567 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 568 569 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 570 571 FUNC_TOKENS = { 572 TokenType.COLLATE, 573 TokenType.COMMAND, 574 TokenType.CURRENT_DATE, 575 TokenType.CURRENT_DATETIME, 576 TokenType.CURRENT_TIMESTAMP, 577 TokenType.CURRENT_TIME, 578 TokenType.CURRENT_USER, 579 TokenType.FILTER, 580 TokenType.FIRST, 581 TokenType.FORMAT, 582 TokenType.GLOB, 583 TokenType.IDENTIFIER, 584 TokenType.INDEX, 585 TokenType.ISNULL, 586 TokenType.ILIKE, 587 TokenType.INSERT, 588 TokenType.LIKE, 589 TokenType.MERGE, 590 TokenType.NEXT, 591 TokenType.OFFSET, 592 TokenType.PRIMARY_KEY, 593 TokenType.RANGE, 594 TokenType.REPLACE, 595 TokenType.RLIKE, 596 TokenType.ROW, 597 TokenType.UNNEST, 598 TokenType.VAR, 599 TokenType.LEFT, 600 TokenType.RIGHT, 601 TokenType.SEQUENCE, 602 TokenType.DATE, 603 TokenType.DATETIME, 604 TokenType.TABLE, 605 TokenType.TIMESTAMP, 606 TokenType.TIMESTAMPTZ, 607 TokenType.TRUNCATE, 608 TokenType.WINDOW, 609 TokenType.XOR, 610 *TYPE_TOKENS, 611 *SUBQUERY_PREDICATES, 612 } 613 614 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 615 TokenType.AND: exp.And, 616 } 617 618 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 619 TokenType.COLON_EQ: exp.PropertyEQ, 620 } 621 622 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 623 TokenType.OR: exp.Or, 624 } 625 626 EQUALITY = { 627 TokenType.EQ: exp.EQ, 628 TokenType.NEQ: exp.NEQ, 629 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 630 } 631 632 COMPARISON = { 633 TokenType.GT: exp.GT, 634 TokenType.GTE: exp.GTE, 635 TokenType.LT: exp.LT, 636 TokenType.LTE: exp.LTE, 637 } 638 639 BITWISE = { 640 TokenType.AMP: exp.BitwiseAnd, 641 TokenType.CARET: exp.BitwiseXor, 642 TokenType.PIPE: exp.BitwiseOr, 643 } 644 645 TERM = { 646 TokenType.DASH: exp.Sub, 647 TokenType.PLUS: exp.Add, 648 TokenType.MOD: exp.Mod, 649 TokenType.COLLATE: exp.Collate, 650 } 651 652 FACTOR = { 653 TokenType.DIV: exp.IntDiv, 654 TokenType.LR_ARROW: exp.Distance, 655 TokenType.SLASH: exp.Div, 656 TokenType.STAR: exp.Mul, 657 } 658 659 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 660 661 TIMES = { 662 TokenType.TIME, 663 TokenType.TIMETZ, 664 } 665 666 TIMESTAMPS = { 667 TokenType.TIMESTAMP, 668 TokenType.TIMESTAMPTZ, 669 TokenType.TIMESTAMPLTZ, 670 *TIMES, 671 } 672 673 SET_OPERATIONS = { 674 TokenType.UNION, 675 TokenType.INTERSECT, 676 TokenType.EXCEPT, 677 } 678 679 JOIN_METHODS = { 680 TokenType.ASOF, 681 TokenType.NATURAL, 682 TokenType.POSITIONAL, 683 } 684 685 JOIN_SIDES = { 686 TokenType.LEFT, 687 TokenType.RIGHT, 688 TokenType.FULL, 689 } 690 691 JOIN_KINDS = { 692 TokenType.ANTI, 693 TokenType.CROSS, 694 TokenType.INNER, 695 TokenType.OUTER, 696 TokenType.SEMI, 697 TokenType.STRAIGHT_JOIN, 698 } 699 700 JOIN_HINTS: t.Set[str] = set() 701 702 LAMBDAS = { 703 TokenType.ARROW: lambda self, expressions: self.expression( 704 exp.Lambda, 705 this=self._replace_lambda( 706 self._parse_assignment(), 707 expressions, 708 ), 709 expressions=expressions, 710 ), 711 TokenType.FARROW: lambda self, expressions: self.expression( 712 exp.Kwarg, 713 this=exp.var(expressions[0].name), 714 expression=self._parse_assignment(), 715 ), 716 } 717 718 COLUMN_OPERATORS = { 719 TokenType.DOT: None, 720 TokenType.DCOLON: lambda self, this, to: self.expression( 721 exp.Cast if self.STRICT_CAST else exp.TryCast, 722 this=this, 723 to=to, 724 ), 725 TokenType.ARROW: lambda self, this, path: self.expression( 726 exp.JSONExtract, 727 this=this, 728 expression=self.dialect.to_json_path(path), 729 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 730 ), 731 TokenType.DARROW: lambda self, this, path: self.expression( 732 exp.JSONExtractScalar, 733 this=this, 734 expression=self.dialect.to_json_path(path), 735 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 736 ), 737 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 738 exp.JSONBExtract, 739 this=this, 740 expression=path, 741 ), 742 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 743 exp.JSONBExtractScalar, 744 this=this, 745 expression=path, 746 ), 747 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 748 exp.JSONBContains, 749 this=this, 750 expression=key, 751 ), 752 } 753 754 EXPRESSION_PARSERS = { 755 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 756 exp.Column: lambda self: self._parse_column(), 757 exp.Condition: lambda self: self._parse_assignment(), 758 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 759 exp.Expression: lambda self: self._parse_expression(), 760 exp.From: lambda self: self._parse_from(joins=True), 761 exp.Group: lambda self: self._parse_group(), 762 exp.Having: lambda self: self._parse_having(), 763 exp.Hint: lambda self: self._parse_hint_body(), 764 exp.Identifier: lambda self: self._parse_id_var(), 765 exp.Join: lambda self: self._parse_join(), 766 exp.Lambda: lambda self: self._parse_lambda(), 767 exp.Lateral: lambda self: self._parse_lateral(), 768 exp.Limit: lambda self: self._parse_limit(), 769 exp.Offset: lambda self: self._parse_offset(), 770 exp.Order: lambda self: self._parse_order(), 771 exp.Ordered: lambda self: self._parse_ordered(), 772 exp.Properties: lambda self: self._parse_properties(), 773 exp.Qualify: lambda self: self._parse_qualify(), 774 exp.Returning: lambda self: self._parse_returning(), 775 exp.Select: lambda self: self._parse_select(), 776 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 777 exp.Table: lambda self: self._parse_table_parts(), 778 exp.TableAlias: lambda self: self._parse_table_alias(), 779 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 780 exp.Where: lambda self: self._parse_where(), 781 exp.Window: lambda self: self._parse_named_window(), 782 exp.With: lambda self: self._parse_with(), 783 "JOIN_TYPE": lambda self: self._parse_join_parts(), 784 } 785 786 STATEMENT_PARSERS = { 787 TokenType.ALTER: lambda self: self._parse_alter(), 788 TokenType.BEGIN: lambda self: self._parse_transaction(), 789 TokenType.CACHE: lambda self: self._parse_cache(), 790 TokenType.COMMENT: lambda self: self._parse_comment(), 791 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 792 TokenType.COPY: lambda self: self._parse_copy(), 793 TokenType.CREATE: lambda self: self._parse_create(), 794 TokenType.DELETE: lambda self: self._parse_delete(), 795 TokenType.DESC: lambda self: self._parse_describe(), 796 TokenType.DESCRIBE: lambda self: self._parse_describe(), 797 TokenType.DROP: lambda self: self._parse_drop(), 798 TokenType.GRANT: lambda self: self._parse_grant(), 799 TokenType.INSERT: lambda self: self._parse_insert(), 800 TokenType.KILL: lambda self: self._parse_kill(), 801 TokenType.LOAD: lambda self: self._parse_load(), 802 TokenType.MERGE: lambda self: self._parse_merge(), 803 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 804 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 805 TokenType.REFRESH: lambda self: self._parse_refresh(), 806 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 807 TokenType.SET: lambda self: self._parse_set(), 808 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 809 TokenType.UNCACHE: lambda self: self._parse_uncache(), 810 TokenType.UPDATE: lambda self: self._parse_update(), 811 TokenType.USE: lambda self: self.expression( 812 exp.Use, 813 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 814 this=self._parse_table(schema=False), 815 ), 816 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 817 } 818 819 UNARY_PARSERS = { 820 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 821 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 822 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 823 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 824 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 825 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 826 } 827 828 STRING_PARSERS = { 829 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 830 exp.RawString, this=token.text 831 ), 832 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 833 exp.National, this=token.text 834 ), 835 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 836 TokenType.STRING: lambda self, token: self.expression( 837 exp.Literal, this=token.text, is_string=True 838 ), 839 TokenType.UNICODE_STRING: lambda self, token: self.expression( 840 exp.UnicodeString, 841 this=token.text, 842 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 843 ), 844 } 845 846 NUMERIC_PARSERS = { 847 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 848 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 849 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 850 TokenType.NUMBER: lambda self, token: self.expression( 851 exp.Literal, this=token.text, is_string=False 852 ), 853 } 854 855 PRIMARY_PARSERS = { 856 **STRING_PARSERS, 857 **NUMERIC_PARSERS, 858 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 859 TokenType.NULL: lambda self, _: self.expression(exp.Null), 860 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 861 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 862 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 863 TokenType.STAR: lambda self, _: self._parse_star_ops(), 864 } 865 866 PLACEHOLDER_PARSERS = { 867 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 868 TokenType.PARAMETER: lambda self: self._parse_parameter(), 869 TokenType.COLON: lambda self: ( 870 self.expression(exp.Placeholder, this=self._prev.text) 871 if self._match_set(self.ID_VAR_TOKENS) 872 else None 873 ), 874 } 875 876 RANGE_PARSERS = { 877 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 878 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 879 TokenType.GLOB: binary_range_parser(exp.Glob), 880 TokenType.ILIKE: binary_range_parser(exp.ILike), 881 TokenType.IN: lambda self, this: self._parse_in(this), 882 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 883 TokenType.IS: lambda self, this: self._parse_is(this), 884 TokenType.LIKE: binary_range_parser(exp.Like), 885 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 886 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 887 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 888 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 889 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 890 } 891 892 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 893 "ALLOWED_VALUES": lambda self: self.expression( 894 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 895 ), 896 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 897 "AUTO": lambda self: self._parse_auto_property(), 898 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 899 "BACKUP": lambda self: self.expression( 900 exp.BackupProperty, this=self._parse_var(any_token=True) 901 ), 902 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 903 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 904 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 905 "CHECKSUM": lambda self: self._parse_checksum(), 906 "CLUSTER BY": lambda self: self._parse_cluster(), 907 "CLUSTERED": lambda self: self._parse_clustered_by(), 908 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 909 exp.CollateProperty, **kwargs 910 ), 911 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 912 "CONTAINS": lambda self: self._parse_contains_property(), 913 "COPY": lambda self: self._parse_copy_property(), 914 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 915 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 916 "DEFINER": lambda self: self._parse_definer(), 917 "DETERMINISTIC": lambda self: self.expression( 918 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 919 ), 920 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 921 "DUPLICATE": lambda self: self._parse_duplicate(), 922 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 923 "DISTKEY": lambda self: self._parse_distkey(), 924 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 925 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 926 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 927 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 928 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 929 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 930 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 931 "FREESPACE": lambda self: self._parse_freespace(), 932 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 933 "HEAP": lambda self: self.expression(exp.HeapProperty), 934 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 935 "IMMUTABLE": lambda self: self.expression( 936 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 937 ), 938 "INHERITS": lambda self: self.expression( 939 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 940 ), 941 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 942 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 943 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 944 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 945 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 946 "LIKE": lambda self: self._parse_create_like(), 947 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 948 "LOCK": lambda self: self._parse_locking(), 949 "LOCKING": lambda self: self._parse_locking(), 950 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 951 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 952 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 953 "MODIFIES": lambda self: self._parse_modifies_property(), 954 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 955 "NO": lambda self: self._parse_no_property(), 956 "ON": lambda self: self._parse_on_property(), 957 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 958 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 959 "PARTITION": lambda self: self._parse_partitioned_of(), 960 "PARTITION BY": lambda self: self._parse_partitioned_by(), 961 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 962 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 963 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 964 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 965 "READS": lambda self: self._parse_reads_property(), 966 "REMOTE": lambda self: self._parse_remote_with_connection(), 967 "RETURNS": lambda self: self._parse_returns(), 968 "STRICT": lambda self: self.expression(exp.StrictProperty), 969 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 970 "ROW": lambda self: self._parse_row(), 971 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 972 "SAMPLE": lambda self: self.expression( 973 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 974 ), 975 "SECURE": lambda self: self.expression(exp.SecureProperty), 976 "SECURITY": lambda self: self._parse_security(), 977 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 978 "SETTINGS": lambda self: self._parse_settings_property(), 979 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 980 "SORTKEY": lambda self: self._parse_sortkey(), 981 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 982 "STABLE": lambda self: self.expression( 983 exp.StabilityProperty, this=exp.Literal.string("STABLE") 984 ), 985 "STORED": lambda self: self._parse_stored(), 986 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 987 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 988 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 989 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 990 "TO": lambda self: self._parse_to_table(), 991 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 992 "TRANSFORM": lambda self: self.expression( 993 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 994 ), 995 "TTL": lambda self: self._parse_ttl(), 996 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 997 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 998 "VOLATILE": lambda self: self._parse_volatile_property(), 999 "WITH": lambda self: self._parse_with_property(), 1000 } 1001 1002 CONSTRAINT_PARSERS = { 1003 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1004 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1005 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1006 "CHARACTER SET": lambda self: self.expression( 1007 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1008 ), 1009 "CHECK": lambda self: self.expression( 1010 exp.CheckColumnConstraint, 1011 this=self._parse_wrapped(self._parse_assignment), 1012 enforced=self._match_text_seq("ENFORCED"), 1013 ), 1014 "COLLATE": lambda self: self.expression( 1015 exp.CollateColumnConstraint, 1016 this=self._parse_identifier() or self._parse_column(), 1017 ), 1018 "COMMENT": lambda self: self.expression( 1019 exp.CommentColumnConstraint, this=self._parse_string() 1020 ), 1021 "COMPRESS": lambda self: self._parse_compress(), 1022 "CLUSTERED": lambda self: self.expression( 1023 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1024 ), 1025 "NONCLUSTERED": lambda self: self.expression( 1026 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1027 ), 1028 "DEFAULT": lambda self: self.expression( 1029 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1030 ), 1031 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1032 "EPHEMERAL": lambda self: self.expression( 1033 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1034 ), 1035 "EXCLUDE": lambda self: self.expression( 1036 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1037 ), 1038 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1039 "FORMAT": lambda self: self.expression( 1040 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1041 ), 1042 "GENERATED": lambda self: self._parse_generated_as_identity(), 1043 "IDENTITY": lambda self: self._parse_auto_increment(), 1044 "INLINE": lambda self: self._parse_inline(), 1045 "LIKE": lambda self: self._parse_create_like(), 1046 "NOT": lambda self: self._parse_not_constraint(), 1047 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1048 "ON": lambda self: ( 1049 self._match(TokenType.UPDATE) 1050 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1051 ) 1052 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1053 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1054 "PERIOD": lambda self: self._parse_period_for_system_time(), 1055 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1056 "REFERENCES": lambda self: self._parse_references(match=False), 1057 "TITLE": lambda self: self.expression( 1058 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1059 ), 1060 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1061 "UNIQUE": lambda self: self._parse_unique(), 1062 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1063 "WATERMARK": lambda self: self.expression( 1064 exp.WatermarkColumnConstraint, 1065 this=self._match(TokenType.FOR) and self._parse_column(), 1066 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1067 ), 1068 "WITH": lambda self: self.expression( 1069 exp.Properties, expressions=self._parse_wrapped_properties() 1070 ), 1071 } 1072 1073 ALTER_PARSERS = { 1074 "ADD": lambda self: self._parse_alter_table_add(), 1075 "AS": lambda self: self._parse_select(), 1076 "ALTER": lambda self: self._parse_alter_table_alter(), 1077 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1078 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1079 "DROP": lambda self: self._parse_alter_table_drop(), 1080 "RENAME": lambda self: self._parse_alter_table_rename(), 1081 "SET": lambda self: self._parse_alter_table_set(), 1082 "SWAP": lambda self: self.expression( 1083 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1084 ), 1085 } 1086 1087 ALTER_ALTER_PARSERS = { 1088 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1089 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1090 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1091 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1092 } 1093 1094 SCHEMA_UNNAMED_CONSTRAINTS = { 1095 "CHECK", 1096 "EXCLUDE", 1097 "FOREIGN KEY", 1098 "LIKE", 1099 "PERIOD", 1100 "PRIMARY KEY", 1101 "UNIQUE", 1102 "WATERMARK", 1103 } 1104 1105 NO_PAREN_FUNCTION_PARSERS = { 1106 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1107 "CASE": lambda self: self._parse_case(), 1108 "CONNECT_BY_ROOT": lambda self: self.expression( 1109 exp.ConnectByRoot, this=self._parse_column() 1110 ), 1111 "IF": lambda self: self._parse_if(), 1112 } 1113 1114 INVALID_FUNC_NAME_TOKENS = { 1115 TokenType.IDENTIFIER, 1116 TokenType.STRING, 1117 } 1118 1119 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1120 1121 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1122 1123 FUNCTION_PARSERS = { 1124 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1125 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1126 "DECODE": lambda self: self._parse_decode(), 1127 "EXTRACT": lambda self: self._parse_extract(), 1128 "GAP_FILL": lambda self: self._parse_gap_fill(), 1129 "JSON_OBJECT": lambda self: self._parse_json_object(), 1130 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1131 "JSON_TABLE": lambda self: self._parse_json_table(), 1132 "MATCH": lambda self: self._parse_match_against(), 1133 "NORMALIZE": lambda self: self._parse_normalize(), 1134 "OPENJSON": lambda self: self._parse_open_json(), 1135 "OVERLAY": lambda self: self._parse_overlay(), 1136 "POSITION": lambda self: self._parse_position(), 1137 "PREDICT": lambda self: self._parse_predict(), 1138 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1139 "STRING_AGG": lambda self: self._parse_string_agg(), 1140 "SUBSTRING": lambda self: self._parse_substring(), 1141 "TRIM": lambda self: self._parse_trim(), 1142 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1143 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1144 } 1145 1146 QUERY_MODIFIER_PARSERS = { 1147 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1148 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1149 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1150 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1151 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1152 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1153 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1154 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1155 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1156 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1157 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1158 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1159 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1160 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1161 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1162 TokenType.CLUSTER_BY: lambda self: ( 1163 "cluster", 1164 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1165 ), 1166 TokenType.DISTRIBUTE_BY: lambda self: ( 1167 "distribute", 1168 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1169 ), 1170 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1171 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1172 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1173 } 1174 1175 SET_PARSERS = { 1176 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1177 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1178 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1179 "TRANSACTION": lambda self: self._parse_set_transaction(), 1180 } 1181 1182 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1183 1184 TYPE_LITERAL_PARSERS = { 1185 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1186 } 1187 1188 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1189 1190 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1191 1192 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1193 1194 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1195 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1196 "ISOLATION": ( 1197 ("LEVEL", "REPEATABLE", "READ"), 1198 ("LEVEL", "READ", "COMMITTED"), 1199 ("LEVEL", "READ", "UNCOMITTED"), 1200 ("LEVEL", "SERIALIZABLE"), 1201 ), 1202 "READ": ("WRITE", "ONLY"), 1203 } 1204 1205 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1206 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1207 ) 1208 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1209 1210 CREATE_SEQUENCE: OPTIONS_TYPE = { 1211 "SCALE": ("EXTEND", "NOEXTEND"), 1212 "SHARD": ("EXTEND", "NOEXTEND"), 1213 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1214 **dict.fromkeys( 1215 ( 1216 "SESSION", 1217 "GLOBAL", 1218 "KEEP", 1219 "NOKEEP", 1220 "ORDER", 1221 "NOORDER", 1222 "NOCACHE", 1223 "CYCLE", 1224 "NOCYCLE", 1225 "NOMINVALUE", 1226 "NOMAXVALUE", 1227 "NOSCALE", 1228 "NOSHARD", 1229 ), 1230 tuple(), 1231 ), 1232 } 1233 1234 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1235 1236 USABLES: OPTIONS_TYPE = dict.fromkeys( 1237 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1238 ) 1239 1240 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1241 1242 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1243 "TYPE": ("EVOLUTION",), 1244 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1245 } 1246 1247 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1248 1249 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1250 1251 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1252 "NOT": ("ENFORCED",), 1253 "MATCH": ( 1254 "FULL", 1255 "PARTIAL", 1256 "SIMPLE", 1257 ), 1258 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1259 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1260 } 1261 1262 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1263 1264 CLONE_KEYWORDS = {"CLONE", "COPY"} 1265 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1266 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1267 1268 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1269 1270 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1271 1272 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1273 1274 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1275 1276 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1277 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1278 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1279 1280 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1281 1282 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1283 1284 ADD_CONSTRAINT_TOKENS = { 1285 TokenType.CONSTRAINT, 1286 TokenType.FOREIGN_KEY, 1287 TokenType.INDEX, 1288 TokenType.KEY, 1289 TokenType.PRIMARY_KEY, 1290 TokenType.UNIQUE, 1291 } 1292 1293 DISTINCT_TOKENS = {TokenType.DISTINCT} 1294 1295 NULL_TOKENS = {TokenType.NULL} 1296 1297 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1298 1299 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1300 1301 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1302 1303 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1304 1305 ODBC_DATETIME_LITERALS = { 1306 "d": exp.Date, 1307 "t": exp.Time, 1308 "ts": exp.Timestamp, 1309 } 1310 1311 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1312 1313 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1314 1315 # The style options for the DESCRIBE statement 1316 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1317 1318 OPERATION_MODIFIERS: t.Set[str] = set() 1319 1320 STRICT_CAST = True 1321 1322 PREFIXED_PIVOT_COLUMNS = False 1323 IDENTIFY_PIVOT_STRINGS = False 1324 1325 LOG_DEFAULTS_TO_LN = False 1326 1327 # Whether ADD is present for each column added by ALTER TABLE 1328 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1329 1330 # Whether the table sample clause expects CSV syntax 1331 TABLESAMPLE_CSV = False 1332 1333 # The default method used for table sampling 1334 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1335 1336 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1337 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1338 1339 # Whether the TRIM function expects the characters to trim as its first argument 1340 TRIM_PATTERN_FIRST = False 1341 1342 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1343 STRING_ALIASES = False 1344 1345 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1346 MODIFIERS_ATTACHED_TO_SET_OP = True 1347 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1348 1349 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1350 NO_PAREN_IF_COMMANDS = True 1351 1352 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1353 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1354 1355 # Whether the `:` operator is used to extract a value from a VARIANT column 1356 COLON_IS_VARIANT_EXTRACT = False 1357 1358 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1359 # If this is True and '(' is not found, the keyword will be treated as an identifier 1360 VALUES_FOLLOWED_BY_PAREN = True 1361 1362 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1363 SUPPORTS_IMPLICIT_UNNEST = False 1364 1365 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1366 INTERVAL_SPANS = True 1367 1368 # Whether a PARTITION clause can follow a table reference 1369 SUPPORTS_PARTITION_SELECTION = False 1370 1371 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1372 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1373 1374 __slots__ = ( 1375 "error_level", 1376 "error_message_context", 1377 "max_errors", 1378 "dialect", 1379 "sql", 1380 "errors", 1381 "_tokens", 1382 "_index", 1383 "_curr", 1384 "_next", 1385 "_prev", 1386 "_prev_comments", 1387 ) 1388 1389 # Autofilled 1390 SHOW_TRIE: t.Dict = {} 1391 SET_TRIE: t.Dict = {} 1392 1393 def __init__( 1394 self, 1395 error_level: t.Optional[ErrorLevel] = None, 1396 error_message_context: int = 100, 1397 max_errors: int = 3, 1398 dialect: DialectType = None, 1399 ): 1400 from sqlglot.dialects import Dialect 1401 1402 self.error_level = error_level or ErrorLevel.IMMEDIATE 1403 self.error_message_context = error_message_context 1404 self.max_errors = max_errors 1405 self.dialect = Dialect.get_or_raise(dialect) 1406 self.reset() 1407 1408 def reset(self): 1409 self.sql = "" 1410 self.errors = [] 1411 self._tokens = [] 1412 self._index = 0 1413 self._curr = None 1414 self._next = None 1415 self._prev = None 1416 self._prev_comments = None 1417 1418 def parse( 1419 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1420 ) -> t.List[t.Optional[exp.Expression]]: 1421 """ 1422 Parses a list of tokens and returns a list of syntax trees, one tree 1423 per parsed SQL statement. 1424 1425 Args: 1426 raw_tokens: The list of tokens. 1427 sql: The original SQL string, used to produce helpful debug messages. 1428 1429 Returns: 1430 The list of the produced syntax trees. 1431 """ 1432 return self._parse( 1433 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1434 ) 1435 1436 def parse_into( 1437 self, 1438 expression_types: exp.IntoType, 1439 raw_tokens: t.List[Token], 1440 sql: t.Optional[str] = None, 1441 ) -> t.List[t.Optional[exp.Expression]]: 1442 """ 1443 Parses a list of tokens into a given Expression type. If a collection of Expression 1444 types is given instead, this method will try to parse the token list into each one 1445 of them, stopping at the first for which the parsing succeeds. 1446 1447 Args: 1448 expression_types: The expression type(s) to try and parse the token list into. 1449 raw_tokens: The list of tokens. 1450 sql: The original SQL string, used to produce helpful debug messages. 1451 1452 Returns: 1453 The target Expression. 1454 """ 1455 errors = [] 1456 for expression_type in ensure_list(expression_types): 1457 parser = self.EXPRESSION_PARSERS.get(expression_type) 1458 if not parser: 1459 raise TypeError(f"No parser registered for {expression_type}") 1460 1461 try: 1462 return self._parse(parser, raw_tokens, sql) 1463 except ParseError as e: 1464 e.errors[0]["into_expression"] = expression_type 1465 errors.append(e) 1466 1467 raise ParseError( 1468 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1469 errors=merge_errors(errors), 1470 ) from errors[-1] 1471 1472 def _parse( 1473 self, 1474 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1475 raw_tokens: t.List[Token], 1476 sql: t.Optional[str] = None, 1477 ) -> t.List[t.Optional[exp.Expression]]: 1478 self.reset() 1479 self.sql = sql or "" 1480 1481 total = len(raw_tokens) 1482 chunks: t.List[t.List[Token]] = [[]] 1483 1484 for i, token in enumerate(raw_tokens): 1485 if token.token_type == TokenType.SEMICOLON: 1486 if token.comments: 1487 chunks.append([token]) 1488 1489 if i < total - 1: 1490 chunks.append([]) 1491 else: 1492 chunks[-1].append(token) 1493 1494 expressions = [] 1495 1496 for tokens in chunks: 1497 self._index = -1 1498 self._tokens = tokens 1499 self._advance() 1500 1501 expressions.append(parse_method(self)) 1502 1503 if self._index < len(self._tokens): 1504 self.raise_error("Invalid expression / Unexpected token") 1505 1506 self.check_errors() 1507 1508 return expressions 1509 1510 def check_errors(self) -> None: 1511 """Logs or raises any found errors, depending on the chosen error level setting.""" 1512 if self.error_level == ErrorLevel.WARN: 1513 for error in self.errors: 1514 logger.error(str(error)) 1515 elif self.error_level == ErrorLevel.RAISE and self.errors: 1516 raise ParseError( 1517 concat_messages(self.errors, self.max_errors), 1518 errors=merge_errors(self.errors), 1519 ) 1520 1521 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1522 """ 1523 Appends an error in the list of recorded errors or raises it, depending on the chosen 1524 error level setting. 1525 """ 1526 token = token or self._curr or self._prev or Token.string("") 1527 start = token.start 1528 end = token.end + 1 1529 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1530 highlight = self.sql[start:end] 1531 end_context = self.sql[end : end + self.error_message_context] 1532 1533 error = ParseError.new( 1534 f"{message}. Line {token.line}, Col: {token.col}.\n" 1535 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1536 description=message, 1537 line=token.line, 1538 col=token.col, 1539 start_context=start_context, 1540 highlight=highlight, 1541 end_context=end_context, 1542 ) 1543 1544 if self.error_level == ErrorLevel.IMMEDIATE: 1545 raise error 1546 1547 self.errors.append(error) 1548 1549 def expression( 1550 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1551 ) -> E: 1552 """ 1553 Creates a new, validated Expression. 1554 1555 Args: 1556 exp_class: The expression class to instantiate. 1557 comments: An optional list of comments to attach to the expression. 1558 kwargs: The arguments to set for the expression along with their respective values. 1559 1560 Returns: 1561 The target expression. 1562 """ 1563 instance = exp_class(**kwargs) 1564 instance.add_comments(comments) if comments else self._add_comments(instance) 1565 return self.validate_expression(instance) 1566 1567 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1568 if expression and self._prev_comments: 1569 expression.add_comments(self._prev_comments) 1570 self._prev_comments = None 1571 1572 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1573 """ 1574 Validates an Expression, making sure that all its mandatory arguments are set. 1575 1576 Args: 1577 expression: The expression to validate. 1578 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1579 1580 Returns: 1581 The validated expression. 1582 """ 1583 if self.error_level != ErrorLevel.IGNORE: 1584 for error_message in expression.error_messages(args): 1585 self.raise_error(error_message) 1586 1587 return expression 1588 1589 def _find_sql(self, start: Token, end: Token) -> str: 1590 return self.sql[start.start : end.end + 1] 1591 1592 def _is_connected(self) -> bool: 1593 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1594 1595 def _advance(self, times: int = 1) -> None: 1596 self._index += times 1597 self._curr = seq_get(self._tokens, self._index) 1598 self._next = seq_get(self._tokens, self._index + 1) 1599 1600 if self._index > 0: 1601 self._prev = self._tokens[self._index - 1] 1602 self._prev_comments = self._prev.comments 1603 else: 1604 self._prev = None 1605 self._prev_comments = None 1606 1607 def _retreat(self, index: int) -> None: 1608 if index != self._index: 1609 self._advance(index - self._index) 1610 1611 def _warn_unsupported(self) -> None: 1612 if len(self._tokens) <= 1: 1613 return 1614 1615 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1616 # interested in emitting a warning for the one being currently processed. 1617 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1618 1619 logger.warning( 1620 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1621 ) 1622 1623 def _parse_command(self) -> exp.Command: 1624 self._warn_unsupported() 1625 return self.expression( 1626 exp.Command, 1627 comments=self._prev_comments, 1628 this=self._prev.text.upper(), 1629 expression=self._parse_string(), 1630 ) 1631 1632 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1633 """ 1634 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1635 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1636 solve this by setting & resetting the parser state accordingly 1637 """ 1638 index = self._index 1639 error_level = self.error_level 1640 1641 self.error_level = ErrorLevel.IMMEDIATE 1642 try: 1643 this = parse_method() 1644 except ParseError: 1645 this = None 1646 finally: 1647 if not this or retreat: 1648 self._retreat(index) 1649 self.error_level = error_level 1650 1651 return this 1652 1653 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1654 start = self._prev 1655 exists = self._parse_exists() if allow_exists else None 1656 1657 self._match(TokenType.ON) 1658 1659 materialized = self._match_text_seq("MATERIALIZED") 1660 kind = self._match_set(self.CREATABLES) and self._prev 1661 if not kind: 1662 return self._parse_as_command(start) 1663 1664 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1665 this = self._parse_user_defined_function(kind=kind.token_type) 1666 elif kind.token_type == TokenType.TABLE: 1667 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1668 elif kind.token_type == TokenType.COLUMN: 1669 this = self._parse_column() 1670 else: 1671 this = self._parse_id_var() 1672 1673 self._match(TokenType.IS) 1674 1675 return self.expression( 1676 exp.Comment, 1677 this=this, 1678 kind=kind.text, 1679 expression=self._parse_string(), 1680 exists=exists, 1681 materialized=materialized, 1682 ) 1683 1684 def _parse_to_table( 1685 self, 1686 ) -> exp.ToTableProperty: 1687 table = self._parse_table_parts(schema=True) 1688 return self.expression(exp.ToTableProperty, this=table) 1689 1690 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1691 def _parse_ttl(self) -> exp.Expression: 1692 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1693 this = self._parse_bitwise() 1694 1695 if self._match_text_seq("DELETE"): 1696 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1697 if self._match_text_seq("RECOMPRESS"): 1698 return self.expression( 1699 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1700 ) 1701 if self._match_text_seq("TO", "DISK"): 1702 return self.expression( 1703 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1704 ) 1705 if self._match_text_seq("TO", "VOLUME"): 1706 return self.expression( 1707 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1708 ) 1709 1710 return this 1711 1712 expressions = self._parse_csv(_parse_ttl_action) 1713 where = self._parse_where() 1714 group = self._parse_group() 1715 1716 aggregates = None 1717 if group and self._match(TokenType.SET): 1718 aggregates = self._parse_csv(self._parse_set_item) 1719 1720 return self.expression( 1721 exp.MergeTreeTTL, 1722 expressions=expressions, 1723 where=where, 1724 group=group, 1725 aggregates=aggregates, 1726 ) 1727 1728 def _parse_statement(self) -> t.Optional[exp.Expression]: 1729 if self._curr is None: 1730 return None 1731 1732 if self._match_set(self.STATEMENT_PARSERS): 1733 comments = self._prev_comments 1734 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1735 stmt.add_comments(comments, prepend=True) 1736 return stmt 1737 1738 if self._match_set(self.dialect.tokenizer.COMMANDS): 1739 return self._parse_command() 1740 1741 expression = self._parse_expression() 1742 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1743 return self._parse_query_modifiers(expression) 1744 1745 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1746 start = self._prev 1747 temporary = self._match(TokenType.TEMPORARY) 1748 materialized = self._match_text_seq("MATERIALIZED") 1749 1750 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1751 if not kind: 1752 return self._parse_as_command(start) 1753 1754 concurrently = self._match_text_seq("CONCURRENTLY") 1755 if_exists = exists or self._parse_exists() 1756 1757 if kind == "COLUMN": 1758 this = self._parse_column() 1759 else: 1760 this = self._parse_table_parts( 1761 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1762 ) 1763 1764 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1765 1766 if self._match(TokenType.L_PAREN, advance=False): 1767 expressions = self._parse_wrapped_csv(self._parse_types) 1768 else: 1769 expressions = None 1770 1771 return self.expression( 1772 exp.Drop, 1773 exists=if_exists, 1774 this=this, 1775 expressions=expressions, 1776 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1777 temporary=temporary, 1778 materialized=materialized, 1779 cascade=self._match_text_seq("CASCADE"), 1780 constraints=self._match_text_seq("CONSTRAINTS"), 1781 purge=self._match_text_seq("PURGE"), 1782 cluster=cluster, 1783 concurrently=concurrently, 1784 ) 1785 1786 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1787 return ( 1788 self._match_text_seq("IF") 1789 and (not not_ or self._match(TokenType.NOT)) 1790 and self._match(TokenType.EXISTS) 1791 ) 1792 1793 def _parse_create(self) -> exp.Create | exp.Command: 1794 # Note: this can't be None because we've matched a statement parser 1795 start = self._prev 1796 1797 replace = ( 1798 start.token_type == TokenType.REPLACE 1799 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1800 or self._match_pair(TokenType.OR, TokenType.ALTER) 1801 ) 1802 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1803 1804 unique = self._match(TokenType.UNIQUE) 1805 1806 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1807 clustered = True 1808 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1809 "COLUMNSTORE" 1810 ): 1811 clustered = False 1812 else: 1813 clustered = None 1814 1815 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1816 self._advance() 1817 1818 properties = None 1819 create_token = self._match_set(self.CREATABLES) and self._prev 1820 1821 if not create_token: 1822 # exp.Properties.Location.POST_CREATE 1823 properties = self._parse_properties() 1824 create_token = self._match_set(self.CREATABLES) and self._prev 1825 1826 if not properties or not create_token: 1827 return self._parse_as_command(start) 1828 1829 concurrently = self._match_text_seq("CONCURRENTLY") 1830 exists = self._parse_exists(not_=True) 1831 this = None 1832 expression: t.Optional[exp.Expression] = None 1833 indexes = None 1834 no_schema_binding = None 1835 begin = None 1836 end = None 1837 clone = None 1838 1839 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1840 nonlocal properties 1841 if properties and temp_props: 1842 properties.expressions.extend(temp_props.expressions) 1843 elif temp_props: 1844 properties = temp_props 1845 1846 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1847 this = self._parse_user_defined_function(kind=create_token.token_type) 1848 1849 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1850 extend_props(self._parse_properties()) 1851 1852 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1853 extend_props(self._parse_properties()) 1854 1855 if not expression: 1856 if self._match(TokenType.COMMAND): 1857 expression = self._parse_as_command(self._prev) 1858 else: 1859 begin = self._match(TokenType.BEGIN) 1860 return_ = self._match_text_seq("RETURN") 1861 1862 if self._match(TokenType.STRING, advance=False): 1863 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1864 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1865 expression = self._parse_string() 1866 extend_props(self._parse_properties()) 1867 else: 1868 expression = self._parse_user_defined_function_expression() 1869 1870 end = self._match_text_seq("END") 1871 1872 if return_: 1873 expression = self.expression(exp.Return, this=expression) 1874 elif create_token.token_type == TokenType.INDEX: 1875 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1876 if not self._match(TokenType.ON): 1877 index = self._parse_id_var() 1878 anonymous = False 1879 else: 1880 index = None 1881 anonymous = True 1882 1883 this = self._parse_index(index=index, anonymous=anonymous) 1884 elif create_token.token_type in self.DB_CREATABLES: 1885 table_parts = self._parse_table_parts( 1886 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1887 ) 1888 1889 # exp.Properties.Location.POST_NAME 1890 self._match(TokenType.COMMA) 1891 extend_props(self._parse_properties(before=True)) 1892 1893 this = self._parse_schema(this=table_parts) 1894 1895 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1896 extend_props(self._parse_properties()) 1897 1898 self._match(TokenType.ALIAS) 1899 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1900 # exp.Properties.Location.POST_ALIAS 1901 extend_props(self._parse_properties()) 1902 1903 if create_token.token_type == TokenType.SEQUENCE: 1904 expression = self._parse_types() 1905 extend_props(self._parse_properties()) 1906 else: 1907 expression = self._parse_ddl_select() 1908 1909 if create_token.token_type == TokenType.TABLE: 1910 # exp.Properties.Location.POST_EXPRESSION 1911 extend_props(self._parse_properties()) 1912 1913 indexes = [] 1914 while True: 1915 index = self._parse_index() 1916 1917 # exp.Properties.Location.POST_INDEX 1918 extend_props(self._parse_properties()) 1919 if not index: 1920 break 1921 else: 1922 self._match(TokenType.COMMA) 1923 indexes.append(index) 1924 elif create_token.token_type == TokenType.VIEW: 1925 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1926 no_schema_binding = True 1927 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1928 extend_props(self._parse_properties()) 1929 1930 shallow = self._match_text_seq("SHALLOW") 1931 1932 if self._match_texts(self.CLONE_KEYWORDS): 1933 copy = self._prev.text.lower() == "copy" 1934 clone = self.expression( 1935 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1936 ) 1937 1938 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1939 return self._parse_as_command(start) 1940 1941 create_kind_text = create_token.text.upper() 1942 return self.expression( 1943 exp.Create, 1944 this=this, 1945 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1946 replace=replace, 1947 refresh=refresh, 1948 unique=unique, 1949 expression=expression, 1950 exists=exists, 1951 properties=properties, 1952 indexes=indexes, 1953 no_schema_binding=no_schema_binding, 1954 begin=begin, 1955 end=end, 1956 clone=clone, 1957 concurrently=concurrently, 1958 clustered=clustered, 1959 ) 1960 1961 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1962 seq = exp.SequenceProperties() 1963 1964 options = [] 1965 index = self._index 1966 1967 while self._curr: 1968 self._match(TokenType.COMMA) 1969 if self._match_text_seq("INCREMENT"): 1970 self._match_text_seq("BY") 1971 self._match_text_seq("=") 1972 seq.set("increment", self._parse_term()) 1973 elif self._match_text_seq("MINVALUE"): 1974 seq.set("minvalue", self._parse_term()) 1975 elif self._match_text_seq("MAXVALUE"): 1976 seq.set("maxvalue", self._parse_term()) 1977 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1978 self._match_text_seq("=") 1979 seq.set("start", self._parse_term()) 1980 elif self._match_text_seq("CACHE"): 1981 # T-SQL allows empty CACHE which is initialized dynamically 1982 seq.set("cache", self._parse_number() or True) 1983 elif self._match_text_seq("OWNED", "BY"): 1984 # "OWNED BY NONE" is the default 1985 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1986 else: 1987 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1988 if opt: 1989 options.append(opt) 1990 else: 1991 break 1992 1993 seq.set("options", options if options else None) 1994 return None if self._index == index else seq 1995 1996 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1997 # only used for teradata currently 1998 self._match(TokenType.COMMA) 1999 2000 kwargs = { 2001 "no": self._match_text_seq("NO"), 2002 "dual": self._match_text_seq("DUAL"), 2003 "before": self._match_text_seq("BEFORE"), 2004 "default": self._match_text_seq("DEFAULT"), 2005 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2006 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2007 "after": self._match_text_seq("AFTER"), 2008 "minimum": self._match_texts(("MIN", "MINIMUM")), 2009 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2010 } 2011 2012 if self._match_texts(self.PROPERTY_PARSERS): 2013 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2014 try: 2015 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2016 except TypeError: 2017 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2018 2019 return None 2020 2021 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2022 return self._parse_wrapped_csv(self._parse_property) 2023 2024 def _parse_property(self) -> t.Optional[exp.Expression]: 2025 if self._match_texts(self.PROPERTY_PARSERS): 2026 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2027 2028 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2029 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2030 2031 if self._match_text_seq("COMPOUND", "SORTKEY"): 2032 return self._parse_sortkey(compound=True) 2033 2034 if self._match_text_seq("SQL", "SECURITY"): 2035 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2036 2037 index = self._index 2038 key = self._parse_column() 2039 2040 if not self._match(TokenType.EQ): 2041 self._retreat(index) 2042 return self._parse_sequence_properties() 2043 2044 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2045 if isinstance(key, exp.Column): 2046 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2047 2048 value = self._parse_bitwise() or self._parse_var(any_token=True) 2049 2050 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2051 if isinstance(value, exp.Column): 2052 value = exp.var(value.name) 2053 2054 return self.expression(exp.Property, this=key, value=value) 2055 2056 def _parse_stored(self) -> exp.FileFormatProperty: 2057 self._match(TokenType.ALIAS) 2058 2059 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2060 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2061 2062 return self.expression( 2063 exp.FileFormatProperty, 2064 this=( 2065 self.expression( 2066 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2067 ) 2068 if input_format or output_format 2069 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2070 ), 2071 ) 2072 2073 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2074 field = self._parse_field() 2075 if isinstance(field, exp.Identifier) and not field.quoted: 2076 field = exp.var(field) 2077 2078 return field 2079 2080 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2081 self._match(TokenType.EQ) 2082 self._match(TokenType.ALIAS) 2083 2084 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2085 2086 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2087 properties = [] 2088 while True: 2089 if before: 2090 prop = self._parse_property_before() 2091 else: 2092 prop = self._parse_property() 2093 if not prop: 2094 break 2095 for p in ensure_list(prop): 2096 properties.append(p) 2097 2098 if properties: 2099 return self.expression(exp.Properties, expressions=properties) 2100 2101 return None 2102 2103 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2104 return self.expression( 2105 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2106 ) 2107 2108 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2109 if self._match_texts(("DEFINER", "INVOKER")): 2110 security_specifier = self._prev.text.upper() 2111 return self.expression(exp.SecurityProperty, this=security_specifier) 2112 return None 2113 2114 def _parse_settings_property(self) -> exp.SettingsProperty: 2115 return self.expression( 2116 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2117 ) 2118 2119 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2120 if self._index >= 2: 2121 pre_volatile_token = self._tokens[self._index - 2] 2122 else: 2123 pre_volatile_token = None 2124 2125 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2126 return exp.VolatileProperty() 2127 2128 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2129 2130 def _parse_retention_period(self) -> exp.Var: 2131 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2132 number = self._parse_number() 2133 number_str = f"{number} " if number else "" 2134 unit = self._parse_var(any_token=True) 2135 return exp.var(f"{number_str}{unit}") 2136 2137 def _parse_system_versioning_property( 2138 self, with_: bool = False 2139 ) -> exp.WithSystemVersioningProperty: 2140 self._match(TokenType.EQ) 2141 prop = self.expression( 2142 exp.WithSystemVersioningProperty, 2143 **{ # type: ignore 2144 "on": True, 2145 "with": with_, 2146 }, 2147 ) 2148 2149 if self._match_text_seq("OFF"): 2150 prop.set("on", False) 2151 return prop 2152 2153 self._match(TokenType.ON) 2154 if self._match(TokenType.L_PAREN): 2155 while self._curr and not self._match(TokenType.R_PAREN): 2156 if self._match_text_seq("HISTORY_TABLE", "="): 2157 prop.set("this", self._parse_table_parts()) 2158 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2159 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2160 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2161 prop.set("retention_period", self._parse_retention_period()) 2162 2163 self._match(TokenType.COMMA) 2164 2165 return prop 2166 2167 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2168 self._match(TokenType.EQ) 2169 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2170 prop = self.expression(exp.DataDeletionProperty, on=on) 2171 2172 if self._match(TokenType.L_PAREN): 2173 while self._curr and not self._match(TokenType.R_PAREN): 2174 if self._match_text_seq("FILTER_COLUMN", "="): 2175 prop.set("filter_column", self._parse_column()) 2176 elif self._match_text_seq("RETENTION_PERIOD", "="): 2177 prop.set("retention_period", self._parse_retention_period()) 2178 2179 self._match(TokenType.COMMA) 2180 2181 return prop 2182 2183 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2184 kind = "HASH" 2185 expressions: t.Optional[t.List[exp.Expression]] = None 2186 if self._match_text_seq("BY", "HASH"): 2187 expressions = self._parse_wrapped_csv(self._parse_id_var) 2188 elif self._match_text_seq("BY", "RANDOM"): 2189 kind = "RANDOM" 2190 2191 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2192 buckets: t.Optional[exp.Expression] = None 2193 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2194 buckets = self._parse_number() 2195 2196 return self.expression( 2197 exp.DistributedByProperty, 2198 expressions=expressions, 2199 kind=kind, 2200 buckets=buckets, 2201 order=self._parse_order(), 2202 ) 2203 2204 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2205 self._match_text_seq("KEY") 2206 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2207 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2208 2209 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2210 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2211 prop = self._parse_system_versioning_property(with_=True) 2212 self._match_r_paren() 2213 return prop 2214 2215 if self._match(TokenType.L_PAREN, advance=False): 2216 return self._parse_wrapped_properties() 2217 2218 if self._match_text_seq("JOURNAL"): 2219 return self._parse_withjournaltable() 2220 2221 if self._match_texts(self.VIEW_ATTRIBUTES): 2222 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2223 2224 if self._match_text_seq("DATA"): 2225 return self._parse_withdata(no=False) 2226 elif self._match_text_seq("NO", "DATA"): 2227 return self._parse_withdata(no=True) 2228 2229 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2230 return self._parse_serde_properties(with_=True) 2231 2232 if self._match(TokenType.SCHEMA): 2233 return self.expression( 2234 exp.WithSchemaBindingProperty, 2235 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2236 ) 2237 2238 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2239 return self.expression( 2240 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2241 ) 2242 2243 if not self._next: 2244 return None 2245 2246 return self._parse_withisolatedloading() 2247 2248 def _parse_procedure_option(self) -> exp.Expression | None: 2249 if self._match_text_seq("EXECUTE", "AS"): 2250 return self.expression( 2251 exp.ExecuteAsProperty, 2252 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2253 or self._parse_string(), 2254 ) 2255 2256 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2257 2258 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2259 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2260 self._match(TokenType.EQ) 2261 2262 user = self._parse_id_var() 2263 self._match(TokenType.PARAMETER) 2264 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2265 2266 if not user or not host: 2267 return None 2268 2269 return exp.DefinerProperty(this=f"{user}@{host}") 2270 2271 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2272 self._match(TokenType.TABLE) 2273 self._match(TokenType.EQ) 2274 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2275 2276 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2277 return self.expression(exp.LogProperty, no=no) 2278 2279 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2280 return self.expression(exp.JournalProperty, **kwargs) 2281 2282 def _parse_checksum(self) -> exp.ChecksumProperty: 2283 self._match(TokenType.EQ) 2284 2285 on = None 2286 if self._match(TokenType.ON): 2287 on = True 2288 elif self._match_text_seq("OFF"): 2289 on = False 2290 2291 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2292 2293 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2294 return self.expression( 2295 exp.Cluster, 2296 expressions=( 2297 self._parse_wrapped_csv(self._parse_ordered) 2298 if wrapped 2299 else self._parse_csv(self._parse_ordered) 2300 ), 2301 ) 2302 2303 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2304 self._match_text_seq("BY") 2305 2306 self._match_l_paren() 2307 expressions = self._parse_csv(self._parse_column) 2308 self._match_r_paren() 2309 2310 if self._match_text_seq("SORTED", "BY"): 2311 self._match_l_paren() 2312 sorted_by = self._parse_csv(self._parse_ordered) 2313 self._match_r_paren() 2314 else: 2315 sorted_by = None 2316 2317 self._match(TokenType.INTO) 2318 buckets = self._parse_number() 2319 self._match_text_seq("BUCKETS") 2320 2321 return self.expression( 2322 exp.ClusteredByProperty, 2323 expressions=expressions, 2324 sorted_by=sorted_by, 2325 buckets=buckets, 2326 ) 2327 2328 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2329 if not self._match_text_seq("GRANTS"): 2330 self._retreat(self._index - 1) 2331 return None 2332 2333 return self.expression(exp.CopyGrantsProperty) 2334 2335 def _parse_freespace(self) -> exp.FreespaceProperty: 2336 self._match(TokenType.EQ) 2337 return self.expression( 2338 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2339 ) 2340 2341 def _parse_mergeblockratio( 2342 self, no: bool = False, default: bool = False 2343 ) -> exp.MergeBlockRatioProperty: 2344 if self._match(TokenType.EQ): 2345 return self.expression( 2346 exp.MergeBlockRatioProperty, 2347 this=self._parse_number(), 2348 percent=self._match(TokenType.PERCENT), 2349 ) 2350 2351 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2352 2353 def _parse_datablocksize( 2354 self, 2355 default: t.Optional[bool] = None, 2356 minimum: t.Optional[bool] = None, 2357 maximum: t.Optional[bool] = None, 2358 ) -> exp.DataBlocksizeProperty: 2359 self._match(TokenType.EQ) 2360 size = self._parse_number() 2361 2362 units = None 2363 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2364 units = self._prev.text 2365 2366 return self.expression( 2367 exp.DataBlocksizeProperty, 2368 size=size, 2369 units=units, 2370 default=default, 2371 minimum=minimum, 2372 maximum=maximum, 2373 ) 2374 2375 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2376 self._match(TokenType.EQ) 2377 always = self._match_text_seq("ALWAYS") 2378 manual = self._match_text_seq("MANUAL") 2379 never = self._match_text_seq("NEVER") 2380 default = self._match_text_seq("DEFAULT") 2381 2382 autotemp = None 2383 if self._match_text_seq("AUTOTEMP"): 2384 autotemp = self._parse_schema() 2385 2386 return self.expression( 2387 exp.BlockCompressionProperty, 2388 always=always, 2389 manual=manual, 2390 never=never, 2391 default=default, 2392 autotemp=autotemp, 2393 ) 2394 2395 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2396 index = self._index 2397 no = self._match_text_seq("NO") 2398 concurrent = self._match_text_seq("CONCURRENT") 2399 2400 if not self._match_text_seq("ISOLATED", "LOADING"): 2401 self._retreat(index) 2402 return None 2403 2404 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2405 return self.expression( 2406 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2407 ) 2408 2409 def _parse_locking(self) -> exp.LockingProperty: 2410 if self._match(TokenType.TABLE): 2411 kind = "TABLE" 2412 elif self._match(TokenType.VIEW): 2413 kind = "VIEW" 2414 elif self._match(TokenType.ROW): 2415 kind = "ROW" 2416 elif self._match_text_seq("DATABASE"): 2417 kind = "DATABASE" 2418 else: 2419 kind = None 2420 2421 if kind in ("DATABASE", "TABLE", "VIEW"): 2422 this = self._parse_table_parts() 2423 else: 2424 this = None 2425 2426 if self._match(TokenType.FOR): 2427 for_or_in = "FOR" 2428 elif self._match(TokenType.IN): 2429 for_or_in = "IN" 2430 else: 2431 for_or_in = None 2432 2433 if self._match_text_seq("ACCESS"): 2434 lock_type = "ACCESS" 2435 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2436 lock_type = "EXCLUSIVE" 2437 elif self._match_text_seq("SHARE"): 2438 lock_type = "SHARE" 2439 elif self._match_text_seq("READ"): 2440 lock_type = "READ" 2441 elif self._match_text_seq("WRITE"): 2442 lock_type = "WRITE" 2443 elif self._match_text_seq("CHECKSUM"): 2444 lock_type = "CHECKSUM" 2445 else: 2446 lock_type = None 2447 2448 override = self._match_text_seq("OVERRIDE") 2449 2450 return self.expression( 2451 exp.LockingProperty, 2452 this=this, 2453 kind=kind, 2454 for_or_in=for_or_in, 2455 lock_type=lock_type, 2456 override=override, 2457 ) 2458 2459 def _parse_partition_by(self) -> t.List[exp.Expression]: 2460 if self._match(TokenType.PARTITION_BY): 2461 return self._parse_csv(self._parse_assignment) 2462 return [] 2463 2464 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2465 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2466 if self._match_text_seq("MINVALUE"): 2467 return exp.var("MINVALUE") 2468 if self._match_text_seq("MAXVALUE"): 2469 return exp.var("MAXVALUE") 2470 return self._parse_bitwise() 2471 2472 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2473 expression = None 2474 from_expressions = None 2475 to_expressions = None 2476 2477 if self._match(TokenType.IN): 2478 this = self._parse_wrapped_csv(self._parse_bitwise) 2479 elif self._match(TokenType.FROM): 2480 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2481 self._match_text_seq("TO") 2482 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2483 elif self._match_text_seq("WITH", "(", "MODULUS"): 2484 this = self._parse_number() 2485 self._match_text_seq(",", "REMAINDER") 2486 expression = self._parse_number() 2487 self._match_r_paren() 2488 else: 2489 self.raise_error("Failed to parse partition bound spec.") 2490 2491 return self.expression( 2492 exp.PartitionBoundSpec, 2493 this=this, 2494 expression=expression, 2495 from_expressions=from_expressions, 2496 to_expressions=to_expressions, 2497 ) 2498 2499 # https://www.postgresql.org/docs/current/sql-createtable.html 2500 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2501 if not self._match_text_seq("OF"): 2502 self._retreat(self._index - 1) 2503 return None 2504 2505 this = self._parse_table(schema=True) 2506 2507 if self._match(TokenType.DEFAULT): 2508 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2509 elif self._match_text_seq("FOR", "VALUES"): 2510 expression = self._parse_partition_bound_spec() 2511 else: 2512 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2513 2514 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2515 2516 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2517 self._match(TokenType.EQ) 2518 return self.expression( 2519 exp.PartitionedByProperty, 2520 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2521 ) 2522 2523 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2524 if self._match_text_seq("AND", "STATISTICS"): 2525 statistics = True 2526 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2527 statistics = False 2528 else: 2529 statistics = None 2530 2531 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2532 2533 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2534 if self._match_text_seq("SQL"): 2535 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2536 return None 2537 2538 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2539 if self._match_text_seq("SQL", "DATA"): 2540 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2541 return None 2542 2543 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2544 if self._match_text_seq("PRIMARY", "INDEX"): 2545 return exp.NoPrimaryIndexProperty() 2546 if self._match_text_seq("SQL"): 2547 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2548 return None 2549 2550 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2551 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2552 return exp.OnCommitProperty() 2553 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2554 return exp.OnCommitProperty(delete=True) 2555 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2556 2557 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2558 if self._match_text_seq("SQL", "DATA"): 2559 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2560 return None 2561 2562 def _parse_distkey(self) -> exp.DistKeyProperty: 2563 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2564 2565 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2566 table = self._parse_table(schema=True) 2567 2568 options = [] 2569 while self._match_texts(("INCLUDING", "EXCLUDING")): 2570 this = self._prev.text.upper() 2571 2572 id_var = self._parse_id_var() 2573 if not id_var: 2574 return None 2575 2576 options.append( 2577 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2578 ) 2579 2580 return self.expression(exp.LikeProperty, this=table, expressions=options) 2581 2582 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2583 return self.expression( 2584 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2585 ) 2586 2587 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2588 self._match(TokenType.EQ) 2589 return self.expression( 2590 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2591 ) 2592 2593 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2594 self._match_text_seq("WITH", "CONNECTION") 2595 return self.expression( 2596 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2597 ) 2598 2599 def _parse_returns(self) -> exp.ReturnsProperty: 2600 value: t.Optional[exp.Expression] 2601 null = None 2602 is_table = self._match(TokenType.TABLE) 2603 2604 if is_table: 2605 if self._match(TokenType.LT): 2606 value = self.expression( 2607 exp.Schema, 2608 this="TABLE", 2609 expressions=self._parse_csv(self._parse_struct_types), 2610 ) 2611 if not self._match(TokenType.GT): 2612 self.raise_error("Expecting >") 2613 else: 2614 value = self._parse_schema(exp.var("TABLE")) 2615 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2616 null = True 2617 value = None 2618 else: 2619 value = self._parse_types() 2620 2621 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2622 2623 def _parse_describe(self) -> exp.Describe: 2624 kind = self._match_set(self.CREATABLES) and self._prev.text 2625 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2626 if self._match(TokenType.DOT): 2627 style = None 2628 self._retreat(self._index - 2) 2629 2630 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2631 2632 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2633 this = self._parse_statement() 2634 else: 2635 this = self._parse_table(schema=True) 2636 2637 properties = self._parse_properties() 2638 expressions = properties.expressions if properties else None 2639 partition = self._parse_partition() 2640 return self.expression( 2641 exp.Describe, 2642 this=this, 2643 style=style, 2644 kind=kind, 2645 expressions=expressions, 2646 partition=partition, 2647 format=format, 2648 ) 2649 2650 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2651 kind = self._prev.text.upper() 2652 expressions = [] 2653 2654 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2655 if self._match(TokenType.WHEN): 2656 expression = self._parse_disjunction() 2657 self._match(TokenType.THEN) 2658 else: 2659 expression = None 2660 2661 else_ = self._match(TokenType.ELSE) 2662 2663 if not self._match(TokenType.INTO): 2664 return None 2665 2666 return self.expression( 2667 exp.ConditionalInsert, 2668 this=self.expression( 2669 exp.Insert, 2670 this=self._parse_table(schema=True), 2671 expression=self._parse_derived_table_values(), 2672 ), 2673 expression=expression, 2674 else_=else_, 2675 ) 2676 2677 expression = parse_conditional_insert() 2678 while expression is not None: 2679 expressions.append(expression) 2680 expression = parse_conditional_insert() 2681 2682 return self.expression( 2683 exp.MultitableInserts, 2684 kind=kind, 2685 comments=comments, 2686 expressions=expressions, 2687 source=self._parse_table(), 2688 ) 2689 2690 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2691 comments = [] 2692 hint = self._parse_hint() 2693 overwrite = self._match(TokenType.OVERWRITE) 2694 ignore = self._match(TokenType.IGNORE) 2695 local = self._match_text_seq("LOCAL") 2696 alternative = None 2697 is_function = None 2698 2699 if self._match_text_seq("DIRECTORY"): 2700 this: t.Optional[exp.Expression] = self.expression( 2701 exp.Directory, 2702 this=self._parse_var_or_string(), 2703 local=local, 2704 row_format=self._parse_row_format(match_row=True), 2705 ) 2706 else: 2707 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2708 comments += ensure_list(self._prev_comments) 2709 return self._parse_multitable_inserts(comments) 2710 2711 if self._match(TokenType.OR): 2712 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2713 2714 self._match(TokenType.INTO) 2715 comments += ensure_list(self._prev_comments) 2716 self._match(TokenType.TABLE) 2717 is_function = self._match(TokenType.FUNCTION) 2718 2719 this = ( 2720 self._parse_table(schema=True, parse_partition=True) 2721 if not is_function 2722 else self._parse_function() 2723 ) 2724 2725 returning = self._parse_returning() 2726 2727 return self.expression( 2728 exp.Insert, 2729 comments=comments, 2730 hint=hint, 2731 is_function=is_function, 2732 this=this, 2733 stored=self._match_text_seq("STORED") and self._parse_stored(), 2734 by_name=self._match_text_seq("BY", "NAME"), 2735 exists=self._parse_exists(), 2736 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2737 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2738 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2739 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2740 conflict=self._parse_on_conflict(), 2741 returning=returning or self._parse_returning(), 2742 overwrite=overwrite, 2743 alternative=alternative, 2744 ignore=ignore, 2745 source=self._match(TokenType.TABLE) and self._parse_table(), 2746 ) 2747 2748 def _parse_kill(self) -> exp.Kill: 2749 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2750 2751 return self.expression( 2752 exp.Kill, 2753 this=self._parse_primary(), 2754 kind=kind, 2755 ) 2756 2757 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2758 conflict = self._match_text_seq("ON", "CONFLICT") 2759 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2760 2761 if not conflict and not duplicate: 2762 return None 2763 2764 conflict_keys = None 2765 constraint = None 2766 2767 if conflict: 2768 if self._match_text_seq("ON", "CONSTRAINT"): 2769 constraint = self._parse_id_var() 2770 elif self._match(TokenType.L_PAREN): 2771 conflict_keys = self._parse_csv(self._parse_id_var) 2772 self._match_r_paren() 2773 2774 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2775 if self._prev.token_type == TokenType.UPDATE: 2776 self._match(TokenType.SET) 2777 expressions = self._parse_csv(self._parse_equality) 2778 else: 2779 expressions = None 2780 2781 return self.expression( 2782 exp.OnConflict, 2783 duplicate=duplicate, 2784 expressions=expressions, 2785 action=action, 2786 conflict_keys=conflict_keys, 2787 constraint=constraint, 2788 ) 2789 2790 def _parse_returning(self) -> t.Optional[exp.Returning]: 2791 if not self._match(TokenType.RETURNING): 2792 return None 2793 return self.expression( 2794 exp.Returning, 2795 expressions=self._parse_csv(self._parse_expression), 2796 into=self._match(TokenType.INTO) and self._parse_table_part(), 2797 ) 2798 2799 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2800 if not self._match(TokenType.FORMAT): 2801 return None 2802 return self._parse_row_format() 2803 2804 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2805 index = self._index 2806 with_ = with_ or self._match_text_seq("WITH") 2807 2808 if not self._match(TokenType.SERDE_PROPERTIES): 2809 self._retreat(index) 2810 return None 2811 return self.expression( 2812 exp.SerdeProperties, 2813 **{ # type: ignore 2814 "expressions": self._parse_wrapped_properties(), 2815 "with": with_, 2816 }, 2817 ) 2818 2819 def _parse_row_format( 2820 self, match_row: bool = False 2821 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2822 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2823 return None 2824 2825 if self._match_text_seq("SERDE"): 2826 this = self._parse_string() 2827 2828 serde_properties = self._parse_serde_properties() 2829 2830 return self.expression( 2831 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2832 ) 2833 2834 self._match_text_seq("DELIMITED") 2835 2836 kwargs = {} 2837 2838 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2839 kwargs["fields"] = self._parse_string() 2840 if self._match_text_seq("ESCAPED", "BY"): 2841 kwargs["escaped"] = self._parse_string() 2842 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2843 kwargs["collection_items"] = self._parse_string() 2844 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2845 kwargs["map_keys"] = self._parse_string() 2846 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2847 kwargs["lines"] = self._parse_string() 2848 if self._match_text_seq("NULL", "DEFINED", "AS"): 2849 kwargs["null"] = self._parse_string() 2850 2851 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2852 2853 def _parse_load(self) -> exp.LoadData | exp.Command: 2854 if self._match_text_seq("DATA"): 2855 local = self._match_text_seq("LOCAL") 2856 self._match_text_seq("INPATH") 2857 inpath = self._parse_string() 2858 overwrite = self._match(TokenType.OVERWRITE) 2859 self._match_pair(TokenType.INTO, TokenType.TABLE) 2860 2861 return self.expression( 2862 exp.LoadData, 2863 this=self._parse_table(schema=True), 2864 local=local, 2865 overwrite=overwrite, 2866 inpath=inpath, 2867 partition=self._parse_partition(), 2868 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2869 serde=self._match_text_seq("SERDE") and self._parse_string(), 2870 ) 2871 return self._parse_as_command(self._prev) 2872 2873 def _parse_delete(self) -> exp.Delete: 2874 # This handles MySQL's "Multiple-Table Syntax" 2875 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2876 tables = None 2877 if not self._match(TokenType.FROM, advance=False): 2878 tables = self._parse_csv(self._parse_table) or None 2879 2880 returning = self._parse_returning() 2881 2882 return self.expression( 2883 exp.Delete, 2884 tables=tables, 2885 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2886 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2887 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2888 where=self._parse_where(), 2889 returning=returning or self._parse_returning(), 2890 limit=self._parse_limit(), 2891 ) 2892 2893 def _parse_update(self) -> exp.Update: 2894 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2895 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2896 returning = self._parse_returning() 2897 return self.expression( 2898 exp.Update, 2899 **{ # type: ignore 2900 "this": this, 2901 "expressions": expressions, 2902 "from": self._parse_from(joins=True), 2903 "where": self._parse_where(), 2904 "returning": returning or self._parse_returning(), 2905 "order": self._parse_order(), 2906 "limit": self._parse_limit(), 2907 }, 2908 ) 2909 2910 def _parse_uncache(self) -> exp.Uncache: 2911 if not self._match(TokenType.TABLE): 2912 self.raise_error("Expecting TABLE after UNCACHE") 2913 2914 return self.expression( 2915 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2916 ) 2917 2918 def _parse_cache(self) -> exp.Cache: 2919 lazy = self._match_text_seq("LAZY") 2920 self._match(TokenType.TABLE) 2921 table = self._parse_table(schema=True) 2922 2923 options = [] 2924 if self._match_text_seq("OPTIONS"): 2925 self._match_l_paren() 2926 k = self._parse_string() 2927 self._match(TokenType.EQ) 2928 v = self._parse_string() 2929 options = [k, v] 2930 self._match_r_paren() 2931 2932 self._match(TokenType.ALIAS) 2933 return self.expression( 2934 exp.Cache, 2935 this=table, 2936 lazy=lazy, 2937 options=options, 2938 expression=self._parse_select(nested=True), 2939 ) 2940 2941 def _parse_partition(self) -> t.Optional[exp.Partition]: 2942 if not self._match(TokenType.PARTITION): 2943 return None 2944 2945 return self.expression( 2946 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2947 ) 2948 2949 def _parse_value(self) -> t.Optional[exp.Tuple]: 2950 def _parse_value_expression() -> t.Optional[exp.Expression]: 2951 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 2952 return exp.var(self._prev.text.upper()) 2953 return self._parse_expression() 2954 2955 if self._match(TokenType.L_PAREN): 2956 expressions = self._parse_csv(_parse_value_expression) 2957 self._match_r_paren() 2958 return self.expression(exp.Tuple, expressions=expressions) 2959 2960 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2961 expression = self._parse_expression() 2962 if expression: 2963 return self.expression(exp.Tuple, expressions=[expression]) 2964 return None 2965 2966 def _parse_projections(self) -> t.List[exp.Expression]: 2967 return self._parse_expressions() 2968 2969 def _parse_select( 2970 self, 2971 nested: bool = False, 2972 table: bool = False, 2973 parse_subquery_alias: bool = True, 2974 parse_set_operation: bool = True, 2975 ) -> t.Optional[exp.Expression]: 2976 cte = self._parse_with() 2977 2978 if cte: 2979 this = self._parse_statement() 2980 2981 if not this: 2982 self.raise_error("Failed to parse any statement following CTE") 2983 return cte 2984 2985 if "with" in this.arg_types: 2986 this.set("with", cte) 2987 else: 2988 self.raise_error(f"{this.key} does not support CTE") 2989 this = cte 2990 2991 return this 2992 2993 # duckdb supports leading with FROM x 2994 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2995 2996 if self._match(TokenType.SELECT): 2997 comments = self._prev_comments 2998 2999 hint = self._parse_hint() 3000 3001 if self._next and not self._next.token_type == TokenType.DOT: 3002 all_ = self._match(TokenType.ALL) 3003 distinct = self._match_set(self.DISTINCT_TOKENS) 3004 else: 3005 all_, distinct = None, None 3006 3007 kind = ( 3008 self._match(TokenType.ALIAS) 3009 and self._match_texts(("STRUCT", "VALUE")) 3010 and self._prev.text.upper() 3011 ) 3012 3013 if distinct: 3014 distinct = self.expression( 3015 exp.Distinct, 3016 on=self._parse_value() if self._match(TokenType.ON) else None, 3017 ) 3018 3019 if all_ and distinct: 3020 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3021 3022 operation_modifiers = [] 3023 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3024 operation_modifiers.append(exp.var(self._prev.text.upper())) 3025 3026 limit = self._parse_limit(top=True) 3027 projections = self._parse_projections() 3028 3029 this = self.expression( 3030 exp.Select, 3031 kind=kind, 3032 hint=hint, 3033 distinct=distinct, 3034 expressions=projections, 3035 limit=limit, 3036 operation_modifiers=operation_modifiers or None, 3037 ) 3038 this.comments = comments 3039 3040 into = self._parse_into() 3041 if into: 3042 this.set("into", into) 3043 3044 if not from_: 3045 from_ = self._parse_from() 3046 3047 if from_: 3048 this.set("from", from_) 3049 3050 this = self._parse_query_modifiers(this) 3051 elif (table or nested) and self._match(TokenType.L_PAREN): 3052 if self._match(TokenType.PIVOT): 3053 this = self._parse_simplified_pivot() 3054 elif self._match(TokenType.FROM): 3055 this = exp.select("*").from_( 3056 t.cast(exp.From, self._parse_from(skip_from_token=True)) 3057 ) 3058 else: 3059 this = ( 3060 self._parse_table() 3061 if table 3062 else self._parse_select(nested=True, parse_set_operation=False) 3063 ) 3064 3065 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3066 # in case a modifier (e.g. join) is following 3067 if table and isinstance(this, exp.Values) and this.alias: 3068 alias = this.args["alias"].pop() 3069 this = exp.Table(this=this, alias=alias) 3070 3071 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3072 3073 self._match_r_paren() 3074 3075 # We return early here so that the UNION isn't attached to the subquery by the 3076 # following call to _parse_set_operations, but instead becomes the parent node 3077 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3078 elif self._match(TokenType.VALUES, advance=False): 3079 this = self._parse_derived_table_values() 3080 elif from_: 3081 this = exp.select("*").from_(from_.this, copy=False) 3082 elif self._match(TokenType.SUMMARIZE): 3083 table = self._match(TokenType.TABLE) 3084 this = self._parse_select() or self._parse_string() or self._parse_table() 3085 return self.expression(exp.Summarize, this=this, table=table) 3086 elif self._match(TokenType.DESCRIBE): 3087 this = self._parse_describe() 3088 elif self._match_text_seq("STREAM"): 3089 this = self._parse_function() 3090 if this: 3091 this = self.expression(exp.Stream, this=this) 3092 else: 3093 self._retreat(self._index - 1) 3094 else: 3095 this = None 3096 3097 return self._parse_set_operations(this) if parse_set_operation else this 3098 3099 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3100 if not skip_with_token and not self._match(TokenType.WITH): 3101 return None 3102 3103 comments = self._prev_comments 3104 recursive = self._match(TokenType.RECURSIVE) 3105 3106 last_comments = None 3107 expressions = [] 3108 while True: 3109 expressions.append(self._parse_cte()) 3110 if last_comments: 3111 expressions[-1].add_comments(last_comments) 3112 3113 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3114 break 3115 else: 3116 self._match(TokenType.WITH) 3117 3118 last_comments = self._prev_comments 3119 3120 return self.expression( 3121 exp.With, comments=comments, expressions=expressions, recursive=recursive 3122 ) 3123 3124 def _parse_cte(self) -> exp.CTE: 3125 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3126 if not alias or not alias.this: 3127 self.raise_error("Expected CTE to have alias") 3128 3129 self._match(TokenType.ALIAS) 3130 comments = self._prev_comments 3131 3132 if self._match_text_seq("NOT", "MATERIALIZED"): 3133 materialized = False 3134 elif self._match_text_seq("MATERIALIZED"): 3135 materialized = True 3136 else: 3137 materialized = None 3138 3139 return self.expression( 3140 exp.CTE, 3141 this=self._parse_wrapped(self._parse_statement), 3142 alias=alias, 3143 materialized=materialized, 3144 comments=comments, 3145 ) 3146 3147 def _parse_table_alias( 3148 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3149 ) -> t.Optional[exp.TableAlias]: 3150 any_token = self._match(TokenType.ALIAS) 3151 alias = ( 3152 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3153 or self._parse_string_as_identifier() 3154 ) 3155 3156 index = self._index 3157 if self._match(TokenType.L_PAREN): 3158 columns = self._parse_csv(self._parse_function_parameter) 3159 self._match_r_paren() if columns else self._retreat(index) 3160 else: 3161 columns = None 3162 3163 if not alias and not columns: 3164 return None 3165 3166 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3167 3168 # We bubble up comments from the Identifier to the TableAlias 3169 if isinstance(alias, exp.Identifier): 3170 table_alias.add_comments(alias.pop_comments()) 3171 3172 return table_alias 3173 3174 def _parse_subquery( 3175 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3176 ) -> t.Optional[exp.Subquery]: 3177 if not this: 3178 return None 3179 3180 return self.expression( 3181 exp.Subquery, 3182 this=this, 3183 pivots=self._parse_pivots(), 3184 alias=self._parse_table_alias() if parse_alias else None, 3185 sample=self._parse_table_sample(), 3186 ) 3187 3188 def _implicit_unnests_to_explicit(self, this: E) -> E: 3189 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3190 3191 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3192 for i, join in enumerate(this.args.get("joins") or []): 3193 table = join.this 3194 normalized_table = table.copy() 3195 normalized_table.meta["maybe_column"] = True 3196 normalized_table = _norm(normalized_table, dialect=self.dialect) 3197 3198 if isinstance(table, exp.Table) and not join.args.get("on"): 3199 if normalized_table.parts[0].name in refs: 3200 table_as_column = table.to_column() 3201 unnest = exp.Unnest(expressions=[table_as_column]) 3202 3203 # Table.to_column creates a parent Alias node that we want to convert to 3204 # a TableAlias and attach to the Unnest, so it matches the parser's output 3205 if isinstance(table.args.get("alias"), exp.TableAlias): 3206 table_as_column.replace(table_as_column.this) 3207 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3208 3209 table.replace(unnest) 3210 3211 refs.add(normalized_table.alias_or_name) 3212 3213 return this 3214 3215 def _parse_query_modifiers( 3216 self, this: t.Optional[exp.Expression] 3217 ) -> t.Optional[exp.Expression]: 3218 if isinstance(this, (exp.Query, exp.Table)): 3219 for join in self._parse_joins(): 3220 this.append("joins", join) 3221 for lateral in iter(self._parse_lateral, None): 3222 this.append("laterals", lateral) 3223 3224 while True: 3225 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3226 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3227 key, expression = parser(self) 3228 3229 if expression: 3230 this.set(key, expression) 3231 if key == "limit": 3232 offset = expression.args.pop("offset", None) 3233 3234 if offset: 3235 offset = exp.Offset(expression=offset) 3236 this.set("offset", offset) 3237 3238 limit_by_expressions = expression.expressions 3239 expression.set("expressions", None) 3240 offset.set("expressions", limit_by_expressions) 3241 continue 3242 break 3243 3244 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3245 this = self._implicit_unnests_to_explicit(this) 3246 3247 return this 3248 3249 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3250 start = self._curr 3251 while self._curr: 3252 self._advance() 3253 3254 end = self._tokens[self._index - 1] 3255 return exp.Hint(expressions=[self._find_sql(start, end)]) 3256 3257 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3258 return self._parse_function_call() 3259 3260 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3261 start_index = self._index 3262 should_fallback_to_string = False 3263 3264 hints = [] 3265 try: 3266 for hint in iter( 3267 lambda: self._parse_csv( 3268 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3269 ), 3270 [], 3271 ): 3272 hints.extend(hint) 3273 except ParseError: 3274 should_fallback_to_string = True 3275 3276 if should_fallback_to_string or self._curr: 3277 self._retreat(start_index) 3278 return self._parse_hint_fallback_to_string() 3279 3280 return self.expression(exp.Hint, expressions=hints) 3281 3282 def _parse_hint(self) -> t.Optional[exp.Hint]: 3283 if self._match(TokenType.HINT) and self._prev_comments: 3284 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3285 3286 return None 3287 3288 def _parse_into(self) -> t.Optional[exp.Into]: 3289 if not self._match(TokenType.INTO): 3290 return None 3291 3292 temp = self._match(TokenType.TEMPORARY) 3293 unlogged = self._match_text_seq("UNLOGGED") 3294 self._match(TokenType.TABLE) 3295 3296 return self.expression( 3297 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3298 ) 3299 3300 def _parse_from( 3301 self, joins: bool = False, skip_from_token: bool = False 3302 ) -> t.Optional[exp.From]: 3303 if not skip_from_token and not self._match(TokenType.FROM): 3304 return None 3305 3306 return self.expression( 3307 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3308 ) 3309 3310 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3311 return self.expression( 3312 exp.MatchRecognizeMeasure, 3313 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3314 this=self._parse_expression(), 3315 ) 3316 3317 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3318 if not self._match(TokenType.MATCH_RECOGNIZE): 3319 return None 3320 3321 self._match_l_paren() 3322 3323 partition = self._parse_partition_by() 3324 order = self._parse_order() 3325 3326 measures = ( 3327 self._parse_csv(self._parse_match_recognize_measure) 3328 if self._match_text_seq("MEASURES") 3329 else None 3330 ) 3331 3332 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3333 rows = exp.var("ONE ROW PER MATCH") 3334 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3335 text = "ALL ROWS PER MATCH" 3336 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3337 text += " SHOW EMPTY MATCHES" 3338 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3339 text += " OMIT EMPTY MATCHES" 3340 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3341 text += " WITH UNMATCHED ROWS" 3342 rows = exp.var(text) 3343 else: 3344 rows = None 3345 3346 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3347 text = "AFTER MATCH SKIP" 3348 if self._match_text_seq("PAST", "LAST", "ROW"): 3349 text += " PAST LAST ROW" 3350 elif self._match_text_seq("TO", "NEXT", "ROW"): 3351 text += " TO NEXT ROW" 3352 elif self._match_text_seq("TO", "FIRST"): 3353 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3354 elif self._match_text_seq("TO", "LAST"): 3355 text += f" TO LAST {self._advance_any().text}" # type: ignore 3356 after = exp.var(text) 3357 else: 3358 after = None 3359 3360 if self._match_text_seq("PATTERN"): 3361 self._match_l_paren() 3362 3363 if not self._curr: 3364 self.raise_error("Expecting )", self._curr) 3365 3366 paren = 1 3367 start = self._curr 3368 3369 while self._curr and paren > 0: 3370 if self._curr.token_type == TokenType.L_PAREN: 3371 paren += 1 3372 if self._curr.token_type == TokenType.R_PAREN: 3373 paren -= 1 3374 3375 end = self._prev 3376 self._advance() 3377 3378 if paren > 0: 3379 self.raise_error("Expecting )", self._curr) 3380 3381 pattern = exp.var(self._find_sql(start, end)) 3382 else: 3383 pattern = None 3384 3385 define = ( 3386 self._parse_csv(self._parse_name_as_expression) 3387 if self._match_text_seq("DEFINE") 3388 else None 3389 ) 3390 3391 self._match_r_paren() 3392 3393 return self.expression( 3394 exp.MatchRecognize, 3395 partition_by=partition, 3396 order=order, 3397 measures=measures, 3398 rows=rows, 3399 after=after, 3400 pattern=pattern, 3401 define=define, 3402 alias=self._parse_table_alias(), 3403 ) 3404 3405 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3406 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3407 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3408 cross_apply = False 3409 3410 if cross_apply is not None: 3411 this = self._parse_select(table=True) 3412 view = None 3413 outer = None 3414 elif self._match(TokenType.LATERAL): 3415 this = self._parse_select(table=True) 3416 view = self._match(TokenType.VIEW) 3417 outer = self._match(TokenType.OUTER) 3418 else: 3419 return None 3420 3421 if not this: 3422 this = ( 3423 self._parse_unnest() 3424 or self._parse_function() 3425 or self._parse_id_var(any_token=False) 3426 ) 3427 3428 while self._match(TokenType.DOT): 3429 this = exp.Dot( 3430 this=this, 3431 expression=self._parse_function() or self._parse_id_var(any_token=False), 3432 ) 3433 3434 if view: 3435 table = self._parse_id_var(any_token=False) 3436 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3437 table_alias: t.Optional[exp.TableAlias] = self.expression( 3438 exp.TableAlias, this=table, columns=columns 3439 ) 3440 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3441 # We move the alias from the lateral's child node to the lateral itself 3442 table_alias = this.args["alias"].pop() 3443 else: 3444 table_alias = self._parse_table_alias() 3445 3446 return self.expression( 3447 exp.Lateral, 3448 this=this, 3449 view=view, 3450 outer=outer, 3451 alias=table_alias, 3452 cross_apply=cross_apply, 3453 ) 3454 3455 def _parse_join_parts( 3456 self, 3457 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3458 return ( 3459 self._match_set(self.JOIN_METHODS) and self._prev, 3460 self._match_set(self.JOIN_SIDES) and self._prev, 3461 self._match_set(self.JOIN_KINDS) and self._prev, 3462 ) 3463 3464 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3465 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3466 this = self._parse_column() 3467 if isinstance(this, exp.Column): 3468 return this.this 3469 return this 3470 3471 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3472 3473 def _parse_join( 3474 self, skip_join_token: bool = False, parse_bracket: bool = False 3475 ) -> t.Optional[exp.Join]: 3476 if self._match(TokenType.COMMA): 3477 return self.expression(exp.Join, this=self._parse_table()) 3478 3479 index = self._index 3480 method, side, kind = self._parse_join_parts() 3481 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3482 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3483 3484 if not skip_join_token and not join: 3485 self._retreat(index) 3486 kind = None 3487 method = None 3488 side = None 3489 3490 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3491 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3492 3493 if not skip_join_token and not join and not outer_apply and not cross_apply: 3494 return None 3495 3496 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3497 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3498 kwargs["expressions"] = self._parse_csv( 3499 lambda: self._parse_table(parse_bracket=parse_bracket) 3500 ) 3501 3502 if method: 3503 kwargs["method"] = method.text 3504 if side: 3505 kwargs["side"] = side.text 3506 if kind: 3507 kwargs["kind"] = kind.text 3508 if hint: 3509 kwargs["hint"] = hint 3510 3511 if self._match(TokenType.MATCH_CONDITION): 3512 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3513 3514 if self._match(TokenType.ON): 3515 kwargs["on"] = self._parse_assignment() 3516 elif self._match(TokenType.USING): 3517 kwargs["using"] = self._parse_using_identifiers() 3518 elif ( 3519 not (outer_apply or cross_apply) 3520 and not isinstance(kwargs["this"], exp.Unnest) 3521 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3522 ): 3523 index = self._index 3524 joins: t.Optional[list] = list(self._parse_joins()) 3525 3526 if joins and self._match(TokenType.ON): 3527 kwargs["on"] = self._parse_assignment() 3528 elif joins and self._match(TokenType.USING): 3529 kwargs["using"] = self._parse_using_identifiers() 3530 else: 3531 joins = None 3532 self._retreat(index) 3533 3534 kwargs["this"].set("joins", joins if joins else None) 3535 3536 comments = [c for token in (method, side, kind) if token for c in token.comments] 3537 return self.expression(exp.Join, comments=comments, **kwargs) 3538 3539 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3540 this = self._parse_assignment() 3541 3542 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3543 return this 3544 3545 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3546 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3547 3548 return this 3549 3550 def _parse_index_params(self) -> exp.IndexParameters: 3551 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3552 3553 if self._match(TokenType.L_PAREN, advance=False): 3554 columns = self._parse_wrapped_csv(self._parse_with_operator) 3555 else: 3556 columns = None 3557 3558 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3559 partition_by = self._parse_partition_by() 3560 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3561 tablespace = ( 3562 self._parse_var(any_token=True) 3563 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3564 else None 3565 ) 3566 where = self._parse_where() 3567 3568 on = self._parse_field() if self._match(TokenType.ON) else None 3569 3570 return self.expression( 3571 exp.IndexParameters, 3572 using=using, 3573 columns=columns, 3574 include=include, 3575 partition_by=partition_by, 3576 where=where, 3577 with_storage=with_storage, 3578 tablespace=tablespace, 3579 on=on, 3580 ) 3581 3582 def _parse_index( 3583 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3584 ) -> t.Optional[exp.Index]: 3585 if index or anonymous: 3586 unique = None 3587 primary = None 3588 amp = None 3589 3590 self._match(TokenType.ON) 3591 self._match(TokenType.TABLE) # hive 3592 table = self._parse_table_parts(schema=True) 3593 else: 3594 unique = self._match(TokenType.UNIQUE) 3595 primary = self._match_text_seq("PRIMARY") 3596 amp = self._match_text_seq("AMP") 3597 3598 if not self._match(TokenType.INDEX): 3599 return None 3600 3601 index = self._parse_id_var() 3602 table = None 3603 3604 params = self._parse_index_params() 3605 3606 return self.expression( 3607 exp.Index, 3608 this=index, 3609 table=table, 3610 unique=unique, 3611 primary=primary, 3612 amp=amp, 3613 params=params, 3614 ) 3615 3616 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3617 hints: t.List[exp.Expression] = [] 3618 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3619 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3620 hints.append( 3621 self.expression( 3622 exp.WithTableHint, 3623 expressions=self._parse_csv( 3624 lambda: self._parse_function() or self._parse_var(any_token=True) 3625 ), 3626 ) 3627 ) 3628 self._match_r_paren() 3629 else: 3630 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3631 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3632 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3633 3634 self._match_set((TokenType.INDEX, TokenType.KEY)) 3635 if self._match(TokenType.FOR): 3636 hint.set("target", self._advance_any() and self._prev.text.upper()) 3637 3638 hint.set("expressions", self._parse_wrapped_id_vars()) 3639 hints.append(hint) 3640 3641 return hints or None 3642 3643 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3644 return ( 3645 (not schema and self._parse_function(optional_parens=False)) 3646 or self._parse_id_var(any_token=False) 3647 or self._parse_string_as_identifier() 3648 or self._parse_placeholder() 3649 ) 3650 3651 def _parse_table_parts( 3652 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3653 ) -> exp.Table: 3654 catalog = None 3655 db = None 3656 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3657 3658 while self._match(TokenType.DOT): 3659 if catalog: 3660 # This allows nesting the table in arbitrarily many dot expressions if needed 3661 table = self.expression( 3662 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3663 ) 3664 else: 3665 catalog = db 3666 db = table 3667 # "" used for tsql FROM a..b case 3668 table = self._parse_table_part(schema=schema) or "" 3669 3670 if ( 3671 wildcard 3672 and self._is_connected() 3673 and (isinstance(table, exp.Identifier) or not table) 3674 and self._match(TokenType.STAR) 3675 ): 3676 if isinstance(table, exp.Identifier): 3677 table.args["this"] += "*" 3678 else: 3679 table = exp.Identifier(this="*") 3680 3681 # We bubble up comments from the Identifier to the Table 3682 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3683 3684 if is_db_reference: 3685 catalog = db 3686 db = table 3687 table = None 3688 3689 if not table and not is_db_reference: 3690 self.raise_error(f"Expected table name but got {self._curr}") 3691 if not db and is_db_reference: 3692 self.raise_error(f"Expected database name but got {self._curr}") 3693 3694 table = self.expression( 3695 exp.Table, 3696 comments=comments, 3697 this=table, 3698 db=db, 3699 catalog=catalog, 3700 ) 3701 3702 changes = self._parse_changes() 3703 if changes: 3704 table.set("changes", changes) 3705 3706 at_before = self._parse_historical_data() 3707 if at_before: 3708 table.set("when", at_before) 3709 3710 pivots = self._parse_pivots() 3711 if pivots: 3712 table.set("pivots", pivots) 3713 3714 return table 3715 3716 def _parse_table( 3717 self, 3718 schema: bool = False, 3719 joins: bool = False, 3720 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3721 parse_bracket: bool = False, 3722 is_db_reference: bool = False, 3723 parse_partition: bool = False, 3724 ) -> t.Optional[exp.Expression]: 3725 lateral = self._parse_lateral() 3726 if lateral: 3727 return lateral 3728 3729 unnest = self._parse_unnest() 3730 if unnest: 3731 return unnest 3732 3733 values = self._parse_derived_table_values() 3734 if values: 3735 return values 3736 3737 subquery = self._parse_select(table=True) 3738 if subquery: 3739 if not subquery.args.get("pivots"): 3740 subquery.set("pivots", self._parse_pivots()) 3741 return subquery 3742 3743 bracket = parse_bracket and self._parse_bracket(None) 3744 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3745 3746 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3747 self._parse_table 3748 ) 3749 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3750 3751 only = self._match(TokenType.ONLY) 3752 3753 this = t.cast( 3754 exp.Expression, 3755 bracket 3756 or rows_from 3757 or self._parse_bracket( 3758 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3759 ), 3760 ) 3761 3762 if only: 3763 this.set("only", only) 3764 3765 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3766 self._match_text_seq("*") 3767 3768 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3769 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3770 this.set("partition", self._parse_partition()) 3771 3772 if schema: 3773 return self._parse_schema(this=this) 3774 3775 version = self._parse_version() 3776 3777 if version: 3778 this.set("version", version) 3779 3780 if self.dialect.ALIAS_POST_TABLESAMPLE: 3781 this.set("sample", self._parse_table_sample()) 3782 3783 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3784 if alias: 3785 this.set("alias", alias) 3786 3787 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3788 return self.expression( 3789 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3790 ) 3791 3792 this.set("hints", self._parse_table_hints()) 3793 3794 if not this.args.get("pivots"): 3795 this.set("pivots", self._parse_pivots()) 3796 3797 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3798 this.set("sample", self._parse_table_sample()) 3799 3800 if joins: 3801 for join in self._parse_joins(): 3802 this.append("joins", join) 3803 3804 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3805 this.set("ordinality", True) 3806 this.set("alias", self._parse_table_alias()) 3807 3808 return this 3809 3810 def _parse_version(self) -> t.Optional[exp.Version]: 3811 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3812 this = "TIMESTAMP" 3813 elif self._match(TokenType.VERSION_SNAPSHOT): 3814 this = "VERSION" 3815 else: 3816 return None 3817 3818 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3819 kind = self._prev.text.upper() 3820 start = self._parse_bitwise() 3821 self._match_texts(("TO", "AND")) 3822 end = self._parse_bitwise() 3823 expression: t.Optional[exp.Expression] = self.expression( 3824 exp.Tuple, expressions=[start, end] 3825 ) 3826 elif self._match_text_seq("CONTAINED", "IN"): 3827 kind = "CONTAINED IN" 3828 expression = self.expression( 3829 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3830 ) 3831 elif self._match(TokenType.ALL): 3832 kind = "ALL" 3833 expression = None 3834 else: 3835 self._match_text_seq("AS", "OF") 3836 kind = "AS OF" 3837 expression = self._parse_type() 3838 3839 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3840 3841 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3842 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3843 index = self._index 3844 historical_data = None 3845 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3846 this = self._prev.text.upper() 3847 kind = ( 3848 self._match(TokenType.L_PAREN) 3849 and self._match_texts(self.HISTORICAL_DATA_KIND) 3850 and self._prev.text.upper() 3851 ) 3852 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3853 3854 if expression: 3855 self._match_r_paren() 3856 historical_data = self.expression( 3857 exp.HistoricalData, this=this, kind=kind, expression=expression 3858 ) 3859 else: 3860 self._retreat(index) 3861 3862 return historical_data 3863 3864 def _parse_changes(self) -> t.Optional[exp.Changes]: 3865 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3866 return None 3867 3868 information = self._parse_var(any_token=True) 3869 self._match_r_paren() 3870 3871 return self.expression( 3872 exp.Changes, 3873 information=information, 3874 at_before=self._parse_historical_data(), 3875 end=self._parse_historical_data(), 3876 ) 3877 3878 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3879 if not self._match(TokenType.UNNEST): 3880 return None 3881 3882 expressions = self._parse_wrapped_csv(self._parse_equality) 3883 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3884 3885 alias = self._parse_table_alias() if with_alias else None 3886 3887 if alias: 3888 if self.dialect.UNNEST_COLUMN_ONLY: 3889 if alias.args.get("columns"): 3890 self.raise_error("Unexpected extra column alias in unnest.") 3891 3892 alias.set("columns", [alias.this]) 3893 alias.set("this", None) 3894 3895 columns = alias.args.get("columns") or [] 3896 if offset and len(expressions) < len(columns): 3897 offset = columns.pop() 3898 3899 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3900 self._match(TokenType.ALIAS) 3901 offset = self._parse_id_var( 3902 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3903 ) or exp.to_identifier("offset") 3904 3905 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3906 3907 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3908 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3909 if not is_derived and not ( 3910 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3911 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3912 ): 3913 return None 3914 3915 expressions = self._parse_csv(self._parse_value) 3916 alias = self._parse_table_alias() 3917 3918 if is_derived: 3919 self._match_r_paren() 3920 3921 return self.expression( 3922 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3923 ) 3924 3925 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3926 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3927 as_modifier and self._match_text_seq("USING", "SAMPLE") 3928 ): 3929 return None 3930 3931 bucket_numerator = None 3932 bucket_denominator = None 3933 bucket_field = None 3934 percent = None 3935 size = None 3936 seed = None 3937 3938 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3939 matched_l_paren = self._match(TokenType.L_PAREN) 3940 3941 if self.TABLESAMPLE_CSV: 3942 num = None 3943 expressions = self._parse_csv(self._parse_primary) 3944 else: 3945 expressions = None 3946 num = ( 3947 self._parse_factor() 3948 if self._match(TokenType.NUMBER, advance=False) 3949 else self._parse_primary() or self._parse_placeholder() 3950 ) 3951 3952 if self._match_text_seq("BUCKET"): 3953 bucket_numerator = self._parse_number() 3954 self._match_text_seq("OUT", "OF") 3955 bucket_denominator = bucket_denominator = self._parse_number() 3956 self._match(TokenType.ON) 3957 bucket_field = self._parse_field() 3958 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3959 percent = num 3960 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3961 size = num 3962 else: 3963 percent = num 3964 3965 if matched_l_paren: 3966 self._match_r_paren() 3967 3968 if self._match(TokenType.L_PAREN): 3969 method = self._parse_var(upper=True) 3970 seed = self._match(TokenType.COMMA) and self._parse_number() 3971 self._match_r_paren() 3972 elif self._match_texts(("SEED", "REPEATABLE")): 3973 seed = self._parse_wrapped(self._parse_number) 3974 3975 if not method and self.DEFAULT_SAMPLING_METHOD: 3976 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3977 3978 return self.expression( 3979 exp.TableSample, 3980 expressions=expressions, 3981 method=method, 3982 bucket_numerator=bucket_numerator, 3983 bucket_denominator=bucket_denominator, 3984 bucket_field=bucket_field, 3985 percent=percent, 3986 size=size, 3987 seed=seed, 3988 ) 3989 3990 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3991 return list(iter(self._parse_pivot, None)) or None 3992 3993 def _parse_joins(self) -> t.Iterator[exp.Join]: 3994 return iter(self._parse_join, None) 3995 3996 # https://duckdb.org/docs/sql/statements/pivot 3997 def _parse_simplified_pivot(self) -> exp.Pivot: 3998 def _parse_on() -> t.Optional[exp.Expression]: 3999 this = self._parse_bitwise() 4000 return self._parse_in(this) if self._match(TokenType.IN) else this 4001 4002 this = self._parse_table() 4003 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4004 using = self._match(TokenType.USING) and self._parse_csv( 4005 lambda: self._parse_alias(self._parse_function()) 4006 ) 4007 group = self._parse_group() 4008 return self.expression( 4009 exp.Pivot, this=this, expressions=expressions, using=using, group=group 4010 ) 4011 4012 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4013 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4014 this = self._parse_select_or_expression() 4015 4016 self._match(TokenType.ALIAS) 4017 alias = self._parse_bitwise() 4018 if alias: 4019 if isinstance(alias, exp.Column) and not alias.db: 4020 alias = alias.this 4021 return self.expression(exp.PivotAlias, this=this, alias=alias) 4022 4023 return this 4024 4025 value = self._parse_column() 4026 4027 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4028 self.raise_error("Expecting IN (") 4029 4030 if self._match(TokenType.ANY): 4031 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4032 else: 4033 exprs = self._parse_csv(_parse_aliased_expression) 4034 4035 self._match_r_paren() 4036 return self.expression(exp.In, this=value, expressions=exprs) 4037 4038 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4039 index = self._index 4040 include_nulls = None 4041 4042 if self._match(TokenType.PIVOT): 4043 unpivot = False 4044 elif self._match(TokenType.UNPIVOT): 4045 unpivot = True 4046 4047 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4048 if self._match_text_seq("INCLUDE", "NULLS"): 4049 include_nulls = True 4050 elif self._match_text_seq("EXCLUDE", "NULLS"): 4051 include_nulls = False 4052 else: 4053 return None 4054 4055 expressions = [] 4056 4057 if not self._match(TokenType.L_PAREN): 4058 self._retreat(index) 4059 return None 4060 4061 if unpivot: 4062 expressions = self._parse_csv(self._parse_column) 4063 else: 4064 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4065 4066 if not expressions: 4067 self.raise_error("Failed to parse PIVOT's aggregation list") 4068 4069 if not self._match(TokenType.FOR): 4070 self.raise_error("Expecting FOR") 4071 4072 field = self._parse_pivot_in() 4073 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4074 self._parse_bitwise 4075 ) 4076 4077 self._match_r_paren() 4078 4079 pivot = self.expression( 4080 exp.Pivot, 4081 expressions=expressions, 4082 field=field, 4083 unpivot=unpivot, 4084 include_nulls=include_nulls, 4085 default_on_null=default_on_null, 4086 ) 4087 4088 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4089 pivot.set("alias", self._parse_table_alias()) 4090 4091 if not unpivot: 4092 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4093 4094 columns: t.List[exp.Expression] = [] 4095 for fld in pivot.args["field"].expressions: 4096 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4097 for name in names: 4098 if self.PREFIXED_PIVOT_COLUMNS: 4099 name = f"{name}_{field_name}" if name else field_name 4100 else: 4101 name = f"{field_name}_{name}" if name else field_name 4102 4103 columns.append(exp.to_identifier(name)) 4104 4105 pivot.set("columns", columns) 4106 4107 return pivot 4108 4109 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4110 return [agg.alias for agg in aggregations] 4111 4112 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4113 if not skip_where_token and not self._match(TokenType.PREWHERE): 4114 return None 4115 4116 return self.expression( 4117 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4118 ) 4119 4120 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4121 if not skip_where_token and not self._match(TokenType.WHERE): 4122 return None 4123 4124 return self.expression( 4125 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4126 ) 4127 4128 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4129 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4130 return None 4131 4132 elements: t.Dict[str, t.Any] = defaultdict(list) 4133 4134 if self._match(TokenType.ALL): 4135 elements["all"] = True 4136 elif self._match(TokenType.DISTINCT): 4137 elements["all"] = False 4138 4139 while True: 4140 index = self._index 4141 4142 elements["expressions"].extend( 4143 self._parse_csv( 4144 lambda: None 4145 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4146 else self._parse_assignment() 4147 ) 4148 ) 4149 4150 before_with_index = self._index 4151 with_prefix = self._match(TokenType.WITH) 4152 4153 if self._match(TokenType.ROLLUP): 4154 elements["rollup"].append( 4155 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4156 ) 4157 elif self._match(TokenType.CUBE): 4158 elements["cube"].append( 4159 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4160 ) 4161 elif self._match(TokenType.GROUPING_SETS): 4162 elements["grouping_sets"].append( 4163 self.expression( 4164 exp.GroupingSets, 4165 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4166 ) 4167 ) 4168 elif self._match_text_seq("TOTALS"): 4169 elements["totals"] = True # type: ignore 4170 4171 if before_with_index <= self._index <= before_with_index + 1: 4172 self._retreat(before_with_index) 4173 break 4174 4175 if index == self._index: 4176 break 4177 4178 return self.expression(exp.Group, **elements) # type: ignore 4179 4180 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4181 return self.expression( 4182 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4183 ) 4184 4185 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4186 if self._match(TokenType.L_PAREN): 4187 grouping_set = self._parse_csv(self._parse_column) 4188 self._match_r_paren() 4189 return self.expression(exp.Tuple, expressions=grouping_set) 4190 4191 return self._parse_column() 4192 4193 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4194 if not skip_having_token and not self._match(TokenType.HAVING): 4195 return None 4196 return self.expression(exp.Having, this=self._parse_assignment()) 4197 4198 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4199 if not self._match(TokenType.QUALIFY): 4200 return None 4201 return self.expression(exp.Qualify, this=self._parse_assignment()) 4202 4203 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4204 if skip_start_token: 4205 start = None 4206 elif self._match(TokenType.START_WITH): 4207 start = self._parse_assignment() 4208 else: 4209 return None 4210 4211 self._match(TokenType.CONNECT_BY) 4212 nocycle = self._match_text_seq("NOCYCLE") 4213 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4214 exp.Prior, this=self._parse_bitwise() 4215 ) 4216 connect = self._parse_assignment() 4217 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4218 4219 if not start and self._match(TokenType.START_WITH): 4220 start = self._parse_assignment() 4221 4222 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4223 4224 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4225 this = self._parse_id_var(any_token=True) 4226 if self._match(TokenType.ALIAS): 4227 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4228 return this 4229 4230 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4231 if self._match_text_seq("INTERPOLATE"): 4232 return self._parse_wrapped_csv(self._parse_name_as_expression) 4233 return None 4234 4235 def _parse_order( 4236 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4237 ) -> t.Optional[exp.Expression]: 4238 siblings = None 4239 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4240 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4241 return this 4242 4243 siblings = True 4244 4245 return self.expression( 4246 exp.Order, 4247 this=this, 4248 expressions=self._parse_csv(self._parse_ordered), 4249 siblings=siblings, 4250 ) 4251 4252 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4253 if not self._match(token): 4254 return None 4255 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4256 4257 def _parse_ordered( 4258 self, parse_method: t.Optional[t.Callable] = None 4259 ) -> t.Optional[exp.Ordered]: 4260 this = parse_method() if parse_method else self._parse_assignment() 4261 if not this: 4262 return None 4263 4264 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4265 this = exp.var("ALL") 4266 4267 asc = self._match(TokenType.ASC) 4268 desc = self._match(TokenType.DESC) or (asc and False) 4269 4270 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4271 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4272 4273 nulls_first = is_nulls_first or False 4274 explicitly_null_ordered = is_nulls_first or is_nulls_last 4275 4276 if ( 4277 not explicitly_null_ordered 4278 and ( 4279 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4280 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4281 ) 4282 and self.dialect.NULL_ORDERING != "nulls_are_last" 4283 ): 4284 nulls_first = True 4285 4286 if self._match_text_seq("WITH", "FILL"): 4287 with_fill = self.expression( 4288 exp.WithFill, 4289 **{ # type: ignore 4290 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4291 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4292 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4293 "interpolate": self._parse_interpolate(), 4294 }, 4295 ) 4296 else: 4297 with_fill = None 4298 4299 return self.expression( 4300 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4301 ) 4302 4303 def _parse_limit( 4304 self, 4305 this: t.Optional[exp.Expression] = None, 4306 top: bool = False, 4307 skip_limit_token: bool = False, 4308 ) -> t.Optional[exp.Expression]: 4309 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4310 comments = self._prev_comments 4311 if top: 4312 limit_paren = self._match(TokenType.L_PAREN) 4313 expression = self._parse_term() if limit_paren else self._parse_number() 4314 4315 if limit_paren: 4316 self._match_r_paren() 4317 else: 4318 expression = self._parse_term() 4319 4320 if self._match(TokenType.COMMA): 4321 offset = expression 4322 expression = self._parse_term() 4323 else: 4324 offset = None 4325 4326 limit_exp = self.expression( 4327 exp.Limit, 4328 this=this, 4329 expression=expression, 4330 offset=offset, 4331 comments=comments, 4332 expressions=self._parse_limit_by(), 4333 ) 4334 4335 return limit_exp 4336 4337 if self._match(TokenType.FETCH): 4338 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4339 direction = self._prev.text.upper() if direction else "FIRST" 4340 4341 count = self._parse_field(tokens=self.FETCH_TOKENS) 4342 percent = self._match(TokenType.PERCENT) 4343 4344 self._match_set((TokenType.ROW, TokenType.ROWS)) 4345 4346 only = self._match_text_seq("ONLY") 4347 with_ties = self._match_text_seq("WITH", "TIES") 4348 4349 if only and with_ties: 4350 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4351 4352 return self.expression( 4353 exp.Fetch, 4354 direction=direction, 4355 count=count, 4356 percent=percent, 4357 with_ties=with_ties, 4358 ) 4359 4360 return this 4361 4362 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4363 if not self._match(TokenType.OFFSET): 4364 return this 4365 4366 count = self._parse_term() 4367 self._match_set((TokenType.ROW, TokenType.ROWS)) 4368 4369 return self.expression( 4370 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4371 ) 4372 4373 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4374 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4375 4376 def _parse_locks(self) -> t.List[exp.Lock]: 4377 locks = [] 4378 while True: 4379 if self._match_text_seq("FOR", "UPDATE"): 4380 update = True 4381 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4382 "LOCK", "IN", "SHARE", "MODE" 4383 ): 4384 update = False 4385 else: 4386 break 4387 4388 expressions = None 4389 if self._match_text_seq("OF"): 4390 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4391 4392 wait: t.Optional[bool | exp.Expression] = None 4393 if self._match_text_seq("NOWAIT"): 4394 wait = True 4395 elif self._match_text_seq("WAIT"): 4396 wait = self._parse_primary() 4397 elif self._match_text_seq("SKIP", "LOCKED"): 4398 wait = False 4399 4400 locks.append( 4401 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4402 ) 4403 4404 return locks 4405 4406 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4407 while this and self._match_set(self.SET_OPERATIONS): 4408 token_type = self._prev.token_type 4409 4410 if token_type == TokenType.UNION: 4411 operation: t.Type[exp.SetOperation] = exp.Union 4412 elif token_type == TokenType.EXCEPT: 4413 operation = exp.Except 4414 else: 4415 operation = exp.Intersect 4416 4417 comments = self._prev.comments 4418 4419 if self._match(TokenType.DISTINCT): 4420 distinct: t.Optional[bool] = True 4421 elif self._match(TokenType.ALL): 4422 distinct = False 4423 else: 4424 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4425 if distinct is None: 4426 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4427 4428 by_name = self._match_text_seq("BY", "NAME") 4429 expression = self._parse_select(nested=True, parse_set_operation=False) 4430 4431 this = self.expression( 4432 operation, 4433 comments=comments, 4434 this=this, 4435 distinct=distinct, 4436 by_name=by_name, 4437 expression=expression, 4438 ) 4439 4440 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4441 expression = this.expression 4442 4443 if expression: 4444 for arg in self.SET_OP_MODIFIERS: 4445 expr = expression.args.get(arg) 4446 if expr: 4447 this.set(arg, expr.pop()) 4448 4449 return this 4450 4451 def _parse_expression(self) -> t.Optional[exp.Expression]: 4452 return self._parse_alias(self._parse_assignment()) 4453 4454 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4455 this = self._parse_disjunction() 4456 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4457 # This allows us to parse <non-identifier token> := <expr> 4458 this = exp.column( 4459 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4460 ) 4461 4462 while self._match_set(self.ASSIGNMENT): 4463 if isinstance(this, exp.Column) and len(this.parts) == 1: 4464 this = this.this 4465 4466 this = self.expression( 4467 self.ASSIGNMENT[self._prev.token_type], 4468 this=this, 4469 comments=self._prev_comments, 4470 expression=self._parse_assignment(), 4471 ) 4472 4473 return this 4474 4475 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4476 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4477 4478 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4479 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4480 4481 def _parse_equality(self) -> t.Optional[exp.Expression]: 4482 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4483 4484 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4485 return self._parse_tokens(self._parse_range, self.COMPARISON) 4486 4487 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4488 this = this or self._parse_bitwise() 4489 negate = self._match(TokenType.NOT) 4490 4491 if self._match_set(self.RANGE_PARSERS): 4492 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4493 if not expression: 4494 return this 4495 4496 this = expression 4497 elif self._match(TokenType.ISNULL): 4498 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4499 4500 # Postgres supports ISNULL and NOTNULL for conditions. 4501 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4502 if self._match(TokenType.NOTNULL): 4503 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4504 this = self.expression(exp.Not, this=this) 4505 4506 if negate: 4507 this = self._negate_range(this) 4508 4509 if self._match(TokenType.IS): 4510 this = self._parse_is(this) 4511 4512 return this 4513 4514 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4515 if not this: 4516 return this 4517 4518 return self.expression(exp.Not, this=this) 4519 4520 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4521 index = self._index - 1 4522 negate = self._match(TokenType.NOT) 4523 4524 if self._match_text_seq("DISTINCT", "FROM"): 4525 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4526 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4527 4528 if self._match(TokenType.JSON): 4529 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4530 4531 if self._match_text_seq("WITH"): 4532 _with = True 4533 elif self._match_text_seq("WITHOUT"): 4534 _with = False 4535 else: 4536 _with = None 4537 4538 unique = self._match(TokenType.UNIQUE) 4539 self._match_text_seq("KEYS") 4540 expression: t.Optional[exp.Expression] = self.expression( 4541 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4542 ) 4543 else: 4544 expression = self._parse_primary() or self._parse_null() 4545 if not expression: 4546 self._retreat(index) 4547 return None 4548 4549 this = self.expression(exp.Is, this=this, expression=expression) 4550 return self.expression(exp.Not, this=this) if negate else this 4551 4552 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4553 unnest = self._parse_unnest(with_alias=False) 4554 if unnest: 4555 this = self.expression(exp.In, this=this, unnest=unnest) 4556 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4557 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4558 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4559 4560 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4561 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4562 else: 4563 this = self.expression(exp.In, this=this, expressions=expressions) 4564 4565 if matched_l_paren: 4566 self._match_r_paren(this) 4567 elif not self._match(TokenType.R_BRACKET, expression=this): 4568 self.raise_error("Expecting ]") 4569 else: 4570 this = self.expression(exp.In, this=this, field=self._parse_column()) 4571 4572 return this 4573 4574 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4575 low = self._parse_bitwise() 4576 self._match(TokenType.AND) 4577 high = self._parse_bitwise() 4578 return self.expression(exp.Between, this=this, low=low, high=high) 4579 4580 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4581 if not self._match(TokenType.ESCAPE): 4582 return this 4583 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4584 4585 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4586 index = self._index 4587 4588 if not self._match(TokenType.INTERVAL) and match_interval: 4589 return None 4590 4591 if self._match(TokenType.STRING, advance=False): 4592 this = self._parse_primary() 4593 else: 4594 this = self._parse_term() 4595 4596 if not this or ( 4597 isinstance(this, exp.Column) 4598 and not this.table 4599 and not this.this.quoted 4600 and this.name.upper() == "IS" 4601 ): 4602 self._retreat(index) 4603 return None 4604 4605 unit = self._parse_function() or ( 4606 not self._match(TokenType.ALIAS, advance=False) 4607 and self._parse_var(any_token=True, upper=True) 4608 ) 4609 4610 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4611 # each INTERVAL expression into this canonical form so it's easy to transpile 4612 if this and this.is_number: 4613 this = exp.Literal.string(this.to_py()) 4614 elif this and this.is_string: 4615 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4616 if len(parts) == 1: 4617 if unit: 4618 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4619 self._retreat(self._index - 1) 4620 4621 this = exp.Literal.string(parts[0][0]) 4622 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4623 4624 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4625 unit = self.expression( 4626 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4627 ) 4628 4629 interval = self.expression(exp.Interval, this=this, unit=unit) 4630 4631 index = self._index 4632 self._match(TokenType.PLUS) 4633 4634 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4635 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4636 return self.expression( 4637 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4638 ) 4639 4640 self._retreat(index) 4641 return interval 4642 4643 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4644 this = self._parse_term() 4645 4646 while True: 4647 if self._match_set(self.BITWISE): 4648 this = self.expression( 4649 self.BITWISE[self._prev.token_type], 4650 this=this, 4651 expression=self._parse_term(), 4652 ) 4653 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4654 this = self.expression( 4655 exp.DPipe, 4656 this=this, 4657 expression=self._parse_term(), 4658 safe=not self.dialect.STRICT_STRING_CONCAT, 4659 ) 4660 elif self._match(TokenType.DQMARK): 4661 this = self.expression( 4662 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4663 ) 4664 elif self._match_pair(TokenType.LT, TokenType.LT): 4665 this = self.expression( 4666 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4667 ) 4668 elif self._match_pair(TokenType.GT, TokenType.GT): 4669 this = self.expression( 4670 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4671 ) 4672 else: 4673 break 4674 4675 return this 4676 4677 def _parse_term(self) -> t.Optional[exp.Expression]: 4678 this = self._parse_factor() 4679 4680 while self._match_set(self.TERM): 4681 klass = self.TERM[self._prev.token_type] 4682 comments = self._prev_comments 4683 expression = self._parse_factor() 4684 4685 this = self.expression(klass, this=this, comments=comments, expression=expression) 4686 4687 if isinstance(this, exp.Collate): 4688 expr = this.expression 4689 4690 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4691 # fallback to Identifier / Var 4692 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4693 ident = expr.this 4694 if isinstance(ident, exp.Identifier): 4695 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4696 4697 return this 4698 4699 def _parse_factor(self) -> t.Optional[exp.Expression]: 4700 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4701 this = parse_method() 4702 4703 while self._match_set(self.FACTOR): 4704 klass = self.FACTOR[self._prev.token_type] 4705 comments = self._prev_comments 4706 expression = parse_method() 4707 4708 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4709 self._retreat(self._index - 1) 4710 return this 4711 4712 this = self.expression(klass, this=this, comments=comments, expression=expression) 4713 4714 if isinstance(this, exp.Div): 4715 this.args["typed"] = self.dialect.TYPED_DIVISION 4716 this.args["safe"] = self.dialect.SAFE_DIVISION 4717 4718 return this 4719 4720 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4721 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4722 4723 def _parse_unary(self) -> t.Optional[exp.Expression]: 4724 if self._match_set(self.UNARY_PARSERS): 4725 return self.UNARY_PARSERS[self._prev.token_type](self) 4726 return self._parse_at_time_zone(self._parse_type()) 4727 4728 def _parse_type( 4729 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4730 ) -> t.Optional[exp.Expression]: 4731 interval = parse_interval and self._parse_interval() 4732 if interval: 4733 return interval 4734 4735 index = self._index 4736 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4737 4738 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4739 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4740 if isinstance(data_type, exp.Cast): 4741 # This constructor can contain ops directly after it, for instance struct unnesting: 4742 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4743 return self._parse_column_ops(data_type) 4744 4745 if data_type: 4746 index2 = self._index 4747 this = self._parse_primary() 4748 4749 if isinstance(this, exp.Literal): 4750 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4751 if parser: 4752 return parser(self, this, data_type) 4753 4754 return self.expression(exp.Cast, this=this, to=data_type) 4755 4756 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4757 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4758 # 4759 # If the index difference here is greater than 1, that means the parser itself must have 4760 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4761 # 4762 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4763 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4764 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4765 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4766 # 4767 # In these cases, we don't really want to return the converted type, but instead retreat 4768 # and try to parse a Column or Identifier in the section below. 4769 if data_type.expressions and index2 - index > 1: 4770 self._retreat(index2) 4771 return self._parse_column_ops(data_type) 4772 4773 self._retreat(index) 4774 4775 if fallback_to_identifier: 4776 return self._parse_id_var() 4777 4778 this = self._parse_column() 4779 return this and self._parse_column_ops(this) 4780 4781 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4782 this = self._parse_type() 4783 if not this: 4784 return None 4785 4786 if isinstance(this, exp.Column) and not this.table: 4787 this = exp.var(this.name.upper()) 4788 4789 return self.expression( 4790 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4791 ) 4792 4793 def _parse_types( 4794 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4795 ) -> t.Optional[exp.Expression]: 4796 index = self._index 4797 4798 this: t.Optional[exp.Expression] = None 4799 prefix = self._match_text_seq("SYSUDTLIB", ".") 4800 4801 if not self._match_set(self.TYPE_TOKENS): 4802 identifier = allow_identifiers and self._parse_id_var( 4803 any_token=False, tokens=(TokenType.VAR,) 4804 ) 4805 if isinstance(identifier, exp.Identifier): 4806 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4807 4808 if len(tokens) != 1: 4809 self.raise_error("Unexpected identifier", self._prev) 4810 4811 if tokens[0].token_type in self.TYPE_TOKENS: 4812 self._prev = tokens[0] 4813 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4814 type_name = identifier.name 4815 4816 while self._match(TokenType.DOT): 4817 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4818 4819 this = exp.DataType.build(type_name, udt=True) 4820 else: 4821 self._retreat(self._index - 1) 4822 return None 4823 else: 4824 return None 4825 4826 type_token = self._prev.token_type 4827 4828 if type_token == TokenType.PSEUDO_TYPE: 4829 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4830 4831 if type_token == TokenType.OBJECT_IDENTIFIER: 4832 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4833 4834 # https://materialize.com/docs/sql/types/map/ 4835 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4836 key_type = self._parse_types( 4837 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4838 ) 4839 if not self._match(TokenType.FARROW): 4840 self._retreat(index) 4841 return None 4842 4843 value_type = self._parse_types( 4844 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4845 ) 4846 if not self._match(TokenType.R_BRACKET): 4847 self._retreat(index) 4848 return None 4849 4850 return exp.DataType( 4851 this=exp.DataType.Type.MAP, 4852 expressions=[key_type, value_type], 4853 nested=True, 4854 prefix=prefix, 4855 ) 4856 4857 nested = type_token in self.NESTED_TYPE_TOKENS 4858 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4859 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4860 expressions = None 4861 maybe_func = False 4862 4863 if self._match(TokenType.L_PAREN): 4864 if is_struct: 4865 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4866 elif nested: 4867 expressions = self._parse_csv( 4868 lambda: self._parse_types( 4869 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4870 ) 4871 ) 4872 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4873 this = expressions[0] 4874 this.set("nullable", True) 4875 self._match_r_paren() 4876 return this 4877 elif type_token in self.ENUM_TYPE_TOKENS: 4878 expressions = self._parse_csv(self._parse_equality) 4879 elif is_aggregate: 4880 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4881 any_token=False, tokens=(TokenType.VAR,) 4882 ) 4883 if not func_or_ident or not self._match(TokenType.COMMA): 4884 return None 4885 expressions = self._parse_csv( 4886 lambda: self._parse_types( 4887 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4888 ) 4889 ) 4890 expressions.insert(0, func_or_ident) 4891 else: 4892 expressions = self._parse_csv(self._parse_type_size) 4893 4894 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4895 if type_token == TokenType.VECTOR and len(expressions) == 2: 4896 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4897 4898 if not expressions or not self._match(TokenType.R_PAREN): 4899 self._retreat(index) 4900 return None 4901 4902 maybe_func = True 4903 4904 values: t.Optional[t.List[exp.Expression]] = None 4905 4906 if nested and self._match(TokenType.LT): 4907 if is_struct: 4908 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4909 else: 4910 expressions = self._parse_csv( 4911 lambda: self._parse_types( 4912 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4913 ) 4914 ) 4915 4916 if not self._match(TokenType.GT): 4917 self.raise_error("Expecting >") 4918 4919 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4920 values = self._parse_csv(self._parse_assignment) 4921 if not values and is_struct: 4922 values = None 4923 self._retreat(self._index - 1) 4924 else: 4925 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4926 4927 if type_token in self.TIMESTAMPS: 4928 if self._match_text_seq("WITH", "TIME", "ZONE"): 4929 maybe_func = False 4930 tz_type = ( 4931 exp.DataType.Type.TIMETZ 4932 if type_token in self.TIMES 4933 else exp.DataType.Type.TIMESTAMPTZ 4934 ) 4935 this = exp.DataType(this=tz_type, expressions=expressions) 4936 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4937 maybe_func = False 4938 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4939 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4940 maybe_func = False 4941 elif type_token == TokenType.INTERVAL: 4942 unit = self._parse_var(upper=True) 4943 if unit: 4944 if self._match_text_seq("TO"): 4945 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4946 4947 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4948 else: 4949 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4950 4951 if maybe_func and check_func: 4952 index2 = self._index 4953 peek = self._parse_string() 4954 4955 if not peek: 4956 self._retreat(index) 4957 return None 4958 4959 self._retreat(index2) 4960 4961 if not this: 4962 if self._match_text_seq("UNSIGNED"): 4963 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4964 if not unsigned_type_token: 4965 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4966 4967 type_token = unsigned_type_token or type_token 4968 4969 this = exp.DataType( 4970 this=exp.DataType.Type[type_token.value], 4971 expressions=expressions, 4972 nested=nested, 4973 prefix=prefix, 4974 ) 4975 4976 # Empty arrays/structs are allowed 4977 if values is not None: 4978 cls = exp.Struct if is_struct else exp.Array 4979 this = exp.cast(cls(expressions=values), this, copy=False) 4980 4981 elif expressions: 4982 this.set("expressions", expressions) 4983 4984 # https://materialize.com/docs/sql/types/list/#type-name 4985 while self._match(TokenType.LIST): 4986 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4987 4988 index = self._index 4989 4990 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4991 matched_array = self._match(TokenType.ARRAY) 4992 4993 while self._curr: 4994 datatype_token = self._prev.token_type 4995 matched_l_bracket = self._match(TokenType.L_BRACKET) 4996 if not matched_l_bracket and not matched_array: 4997 break 4998 4999 matched_array = False 5000 values = self._parse_csv(self._parse_assignment) or None 5001 if ( 5002 values 5003 and not schema 5004 and ( 5005 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5006 ) 5007 ): 5008 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5009 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5010 self._retreat(index) 5011 break 5012 5013 this = exp.DataType( 5014 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5015 ) 5016 self._match(TokenType.R_BRACKET) 5017 5018 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5019 converter = self.TYPE_CONVERTERS.get(this.this) 5020 if converter: 5021 this = converter(t.cast(exp.DataType, this)) 5022 5023 return this 5024 5025 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5026 index = self._index 5027 5028 if ( 5029 self._curr 5030 and self._next 5031 and self._curr.token_type in self.TYPE_TOKENS 5032 and self._next.token_type in self.TYPE_TOKENS 5033 ): 5034 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5035 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5036 this = self._parse_id_var() 5037 else: 5038 this = ( 5039 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5040 or self._parse_id_var() 5041 ) 5042 5043 self._match(TokenType.COLON) 5044 5045 if ( 5046 type_required 5047 and not isinstance(this, exp.DataType) 5048 and not self._match_set(self.TYPE_TOKENS, advance=False) 5049 ): 5050 self._retreat(index) 5051 return self._parse_types() 5052 5053 return self._parse_column_def(this) 5054 5055 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5056 if not self._match_text_seq("AT", "TIME", "ZONE"): 5057 return this 5058 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5059 5060 def _parse_column(self) -> t.Optional[exp.Expression]: 5061 this = self._parse_column_reference() 5062 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5063 5064 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5065 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5066 5067 return column 5068 5069 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5070 this = self._parse_field() 5071 if ( 5072 not this 5073 and self._match(TokenType.VALUES, advance=False) 5074 and self.VALUES_FOLLOWED_BY_PAREN 5075 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5076 ): 5077 this = self._parse_id_var() 5078 5079 if isinstance(this, exp.Identifier): 5080 # We bubble up comments from the Identifier to the Column 5081 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5082 5083 return this 5084 5085 def _parse_colon_as_variant_extract( 5086 self, this: t.Optional[exp.Expression] 5087 ) -> t.Optional[exp.Expression]: 5088 casts = [] 5089 json_path = [] 5090 escape = None 5091 5092 while self._match(TokenType.COLON): 5093 start_index = self._index 5094 5095 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5096 path = self._parse_column_ops( 5097 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5098 ) 5099 5100 # The cast :: operator has a lower precedence than the extraction operator :, so 5101 # we rearrange the AST appropriately to avoid casting the JSON path 5102 while isinstance(path, exp.Cast): 5103 casts.append(path.to) 5104 path = path.this 5105 5106 if casts: 5107 dcolon_offset = next( 5108 i 5109 for i, t in enumerate(self._tokens[start_index:]) 5110 if t.token_type == TokenType.DCOLON 5111 ) 5112 end_token = self._tokens[start_index + dcolon_offset - 1] 5113 else: 5114 end_token = self._prev 5115 5116 if path: 5117 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5118 # it'll roundtrip to a string literal in GET_PATH 5119 if isinstance(path, exp.Identifier) and path.quoted: 5120 escape = True 5121 5122 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5123 5124 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5125 # Databricks transforms it back to the colon/dot notation 5126 if json_path: 5127 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5128 5129 if json_path_expr: 5130 json_path_expr.set("escape", escape) 5131 5132 this = self.expression( 5133 exp.JSONExtract, 5134 this=this, 5135 expression=json_path_expr, 5136 variant_extract=True, 5137 ) 5138 5139 while casts: 5140 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5141 5142 return this 5143 5144 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5145 return self._parse_types() 5146 5147 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5148 this = self._parse_bracket(this) 5149 5150 while self._match_set(self.COLUMN_OPERATORS): 5151 op_token = self._prev.token_type 5152 op = self.COLUMN_OPERATORS.get(op_token) 5153 5154 if op_token == TokenType.DCOLON: 5155 field = self._parse_dcolon() 5156 if not field: 5157 self.raise_error("Expected type") 5158 elif op and self._curr: 5159 field = self._parse_column_reference() or self._parse_bracket() 5160 else: 5161 field = self._parse_field(any_token=True, anonymous_func=True) 5162 5163 if isinstance(field, (exp.Func, exp.Window)) and this: 5164 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5165 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5166 this = exp.replace_tree( 5167 this, 5168 lambda n: ( 5169 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5170 if n.table 5171 else n.this 5172 ) 5173 if isinstance(n, exp.Column) 5174 else n, 5175 ) 5176 5177 if op: 5178 this = op(self, this, field) 5179 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5180 this = self.expression( 5181 exp.Column, 5182 comments=this.comments, 5183 this=field, 5184 table=this.this, 5185 db=this.args.get("table"), 5186 catalog=this.args.get("db"), 5187 ) 5188 elif isinstance(field, exp.Window): 5189 # Move the exp.Dot's to the window's function 5190 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5191 field.set("this", window_func) 5192 this = field 5193 else: 5194 this = self.expression(exp.Dot, this=this, expression=field) 5195 5196 if field and field.comments: 5197 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5198 5199 this = self._parse_bracket(this) 5200 5201 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5202 5203 def _parse_primary(self) -> t.Optional[exp.Expression]: 5204 if self._match_set(self.PRIMARY_PARSERS): 5205 token_type = self._prev.token_type 5206 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5207 5208 if token_type == TokenType.STRING: 5209 expressions = [primary] 5210 while self._match(TokenType.STRING): 5211 expressions.append(exp.Literal.string(self._prev.text)) 5212 5213 if len(expressions) > 1: 5214 return self.expression(exp.Concat, expressions=expressions) 5215 5216 return primary 5217 5218 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5219 return exp.Literal.number(f"0.{self._prev.text}") 5220 5221 if self._match(TokenType.L_PAREN): 5222 comments = self._prev_comments 5223 query = self._parse_select() 5224 5225 if query: 5226 expressions = [query] 5227 else: 5228 expressions = self._parse_expressions() 5229 5230 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5231 5232 if not this and self._match(TokenType.R_PAREN, advance=False): 5233 this = self.expression(exp.Tuple) 5234 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5235 this = self._parse_subquery(this=this, parse_alias=False) 5236 elif isinstance(this, exp.Subquery): 5237 this = self._parse_subquery( 5238 this=self._parse_set_operations(this), parse_alias=False 5239 ) 5240 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5241 this = self.expression(exp.Tuple, expressions=expressions) 5242 else: 5243 this = self.expression(exp.Paren, this=this) 5244 5245 if this: 5246 this.add_comments(comments) 5247 5248 self._match_r_paren(expression=this) 5249 return this 5250 5251 return None 5252 5253 def _parse_field( 5254 self, 5255 any_token: bool = False, 5256 tokens: t.Optional[t.Collection[TokenType]] = None, 5257 anonymous_func: bool = False, 5258 ) -> t.Optional[exp.Expression]: 5259 if anonymous_func: 5260 field = ( 5261 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5262 or self._parse_primary() 5263 ) 5264 else: 5265 field = self._parse_primary() or self._parse_function( 5266 anonymous=anonymous_func, any_token=any_token 5267 ) 5268 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5269 5270 def _parse_function( 5271 self, 5272 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5273 anonymous: bool = False, 5274 optional_parens: bool = True, 5275 any_token: bool = False, 5276 ) -> t.Optional[exp.Expression]: 5277 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5278 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5279 fn_syntax = False 5280 if ( 5281 self._match(TokenType.L_BRACE, advance=False) 5282 and self._next 5283 and self._next.text.upper() == "FN" 5284 ): 5285 self._advance(2) 5286 fn_syntax = True 5287 5288 func = self._parse_function_call( 5289 functions=functions, 5290 anonymous=anonymous, 5291 optional_parens=optional_parens, 5292 any_token=any_token, 5293 ) 5294 5295 if fn_syntax: 5296 self._match(TokenType.R_BRACE) 5297 5298 return func 5299 5300 def _parse_function_call( 5301 self, 5302 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5303 anonymous: bool = False, 5304 optional_parens: bool = True, 5305 any_token: bool = False, 5306 ) -> t.Optional[exp.Expression]: 5307 if not self._curr: 5308 return None 5309 5310 comments = self._curr.comments 5311 token_type = self._curr.token_type 5312 this = self._curr.text 5313 upper = this.upper() 5314 5315 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5316 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5317 self._advance() 5318 return self._parse_window(parser(self)) 5319 5320 if not self._next or self._next.token_type != TokenType.L_PAREN: 5321 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5322 self._advance() 5323 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5324 5325 return None 5326 5327 if any_token: 5328 if token_type in self.RESERVED_TOKENS: 5329 return None 5330 elif token_type not in self.FUNC_TOKENS: 5331 return None 5332 5333 self._advance(2) 5334 5335 parser = self.FUNCTION_PARSERS.get(upper) 5336 if parser and not anonymous: 5337 this = parser(self) 5338 else: 5339 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5340 5341 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5342 this = self.expression( 5343 subquery_predicate, comments=comments, this=self._parse_select() 5344 ) 5345 self._match_r_paren() 5346 return this 5347 5348 if functions is None: 5349 functions = self.FUNCTIONS 5350 5351 function = functions.get(upper) 5352 5353 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5354 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5355 5356 if alias: 5357 args = self._kv_to_prop_eq(args) 5358 5359 if function and not anonymous: 5360 if "dialect" in function.__code__.co_varnames: 5361 func = function(args, dialect=self.dialect) 5362 else: 5363 func = function(args) 5364 5365 func = self.validate_expression(func, args) 5366 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5367 func.meta["name"] = this 5368 5369 this = func 5370 else: 5371 if token_type == TokenType.IDENTIFIER: 5372 this = exp.Identifier(this=this, quoted=True) 5373 this = self.expression(exp.Anonymous, this=this, expressions=args) 5374 5375 if isinstance(this, exp.Expression): 5376 this.add_comments(comments) 5377 5378 self._match_r_paren(this) 5379 return self._parse_window(this) 5380 5381 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5382 return expression 5383 5384 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5385 transformed = [] 5386 5387 for index, e in enumerate(expressions): 5388 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5389 if isinstance(e, exp.Alias): 5390 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5391 5392 if not isinstance(e, exp.PropertyEQ): 5393 e = self.expression( 5394 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5395 ) 5396 5397 if isinstance(e.this, exp.Column): 5398 e.this.replace(e.this.this) 5399 else: 5400 e = self._to_prop_eq(e, index) 5401 5402 transformed.append(e) 5403 5404 return transformed 5405 5406 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5407 return self._parse_statement() 5408 5409 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5410 return self._parse_column_def(self._parse_id_var()) 5411 5412 def _parse_user_defined_function( 5413 self, kind: t.Optional[TokenType] = None 5414 ) -> t.Optional[exp.Expression]: 5415 this = self._parse_id_var() 5416 5417 while self._match(TokenType.DOT): 5418 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5419 5420 if not self._match(TokenType.L_PAREN): 5421 return this 5422 5423 expressions = self._parse_csv(self._parse_function_parameter) 5424 self._match_r_paren() 5425 return self.expression( 5426 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5427 ) 5428 5429 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5430 literal = self._parse_primary() 5431 if literal: 5432 return self.expression(exp.Introducer, this=token.text, expression=literal) 5433 5434 return self.expression(exp.Identifier, this=token.text) 5435 5436 def _parse_session_parameter(self) -> exp.SessionParameter: 5437 kind = None 5438 this = self._parse_id_var() or self._parse_primary() 5439 5440 if this and self._match(TokenType.DOT): 5441 kind = this.name 5442 this = self._parse_var() or self._parse_primary() 5443 5444 return self.expression(exp.SessionParameter, this=this, kind=kind) 5445 5446 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5447 return self._parse_id_var() 5448 5449 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5450 index = self._index 5451 5452 if self._match(TokenType.L_PAREN): 5453 expressions = t.cast( 5454 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5455 ) 5456 5457 if not self._match(TokenType.R_PAREN): 5458 self._retreat(index) 5459 else: 5460 expressions = [self._parse_lambda_arg()] 5461 5462 if self._match_set(self.LAMBDAS): 5463 return self.LAMBDAS[self._prev.token_type](self, expressions) 5464 5465 self._retreat(index) 5466 5467 this: t.Optional[exp.Expression] 5468 5469 if self._match(TokenType.DISTINCT): 5470 this = self.expression( 5471 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5472 ) 5473 else: 5474 this = self._parse_select_or_expression(alias=alias) 5475 5476 return self._parse_limit( 5477 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5478 ) 5479 5480 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5481 index = self._index 5482 if not self._match(TokenType.L_PAREN): 5483 return this 5484 5485 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5486 # expr can be of both types 5487 if self._match_set(self.SELECT_START_TOKENS): 5488 self._retreat(index) 5489 return this 5490 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5491 self._match_r_paren() 5492 return self.expression(exp.Schema, this=this, expressions=args) 5493 5494 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5495 return self._parse_column_def(self._parse_field(any_token=True)) 5496 5497 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5498 # column defs are not really columns, they're identifiers 5499 if isinstance(this, exp.Column): 5500 this = this.this 5501 5502 kind = self._parse_types(schema=True) 5503 5504 if self._match_text_seq("FOR", "ORDINALITY"): 5505 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5506 5507 constraints: t.List[exp.Expression] = [] 5508 5509 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5510 ("ALIAS", "MATERIALIZED") 5511 ): 5512 persisted = self._prev.text.upper() == "MATERIALIZED" 5513 constraint_kind = exp.ComputedColumnConstraint( 5514 this=self._parse_assignment(), 5515 persisted=persisted or self._match_text_seq("PERSISTED"), 5516 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5517 ) 5518 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5519 elif ( 5520 kind 5521 and self._match(TokenType.ALIAS, advance=False) 5522 and ( 5523 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5524 or (self._next and self._next.token_type == TokenType.L_PAREN) 5525 ) 5526 ): 5527 self._advance() 5528 constraints.append( 5529 self.expression( 5530 exp.ColumnConstraint, 5531 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5532 ) 5533 ) 5534 5535 while True: 5536 constraint = self._parse_column_constraint() 5537 if not constraint: 5538 break 5539 constraints.append(constraint) 5540 5541 if not kind and not constraints: 5542 return this 5543 5544 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5545 5546 def _parse_auto_increment( 5547 self, 5548 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5549 start = None 5550 increment = None 5551 5552 if self._match(TokenType.L_PAREN, advance=False): 5553 args = self._parse_wrapped_csv(self._parse_bitwise) 5554 start = seq_get(args, 0) 5555 increment = seq_get(args, 1) 5556 elif self._match_text_seq("START"): 5557 start = self._parse_bitwise() 5558 self._match_text_seq("INCREMENT") 5559 increment = self._parse_bitwise() 5560 5561 if start and increment: 5562 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5563 5564 return exp.AutoIncrementColumnConstraint() 5565 5566 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5567 if not self._match_text_seq("REFRESH"): 5568 self._retreat(self._index - 1) 5569 return None 5570 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5571 5572 def _parse_compress(self) -> exp.CompressColumnConstraint: 5573 if self._match(TokenType.L_PAREN, advance=False): 5574 return self.expression( 5575 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5576 ) 5577 5578 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5579 5580 def _parse_generated_as_identity( 5581 self, 5582 ) -> ( 5583 exp.GeneratedAsIdentityColumnConstraint 5584 | exp.ComputedColumnConstraint 5585 | exp.GeneratedAsRowColumnConstraint 5586 ): 5587 if self._match_text_seq("BY", "DEFAULT"): 5588 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5589 this = self.expression( 5590 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5591 ) 5592 else: 5593 self._match_text_seq("ALWAYS") 5594 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5595 5596 self._match(TokenType.ALIAS) 5597 5598 if self._match_text_seq("ROW"): 5599 start = self._match_text_seq("START") 5600 if not start: 5601 self._match(TokenType.END) 5602 hidden = self._match_text_seq("HIDDEN") 5603 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5604 5605 identity = self._match_text_seq("IDENTITY") 5606 5607 if self._match(TokenType.L_PAREN): 5608 if self._match(TokenType.START_WITH): 5609 this.set("start", self._parse_bitwise()) 5610 if self._match_text_seq("INCREMENT", "BY"): 5611 this.set("increment", self._parse_bitwise()) 5612 if self._match_text_seq("MINVALUE"): 5613 this.set("minvalue", self._parse_bitwise()) 5614 if self._match_text_seq("MAXVALUE"): 5615 this.set("maxvalue", self._parse_bitwise()) 5616 5617 if self._match_text_seq("CYCLE"): 5618 this.set("cycle", True) 5619 elif self._match_text_seq("NO", "CYCLE"): 5620 this.set("cycle", False) 5621 5622 if not identity: 5623 this.set("expression", self._parse_range()) 5624 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5625 args = self._parse_csv(self._parse_bitwise) 5626 this.set("start", seq_get(args, 0)) 5627 this.set("increment", seq_get(args, 1)) 5628 5629 self._match_r_paren() 5630 5631 return this 5632 5633 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5634 self._match_text_seq("LENGTH") 5635 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5636 5637 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5638 if self._match_text_seq("NULL"): 5639 return self.expression(exp.NotNullColumnConstraint) 5640 if self._match_text_seq("CASESPECIFIC"): 5641 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5642 if self._match_text_seq("FOR", "REPLICATION"): 5643 return self.expression(exp.NotForReplicationColumnConstraint) 5644 5645 # Unconsume the `NOT` token 5646 self._retreat(self._index - 1) 5647 return None 5648 5649 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5650 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5651 5652 procedure_option_follows = ( 5653 self._match(TokenType.WITH, advance=False) 5654 and self._next 5655 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5656 ) 5657 5658 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5659 return self.expression( 5660 exp.ColumnConstraint, 5661 this=this, 5662 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5663 ) 5664 5665 return this 5666 5667 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5668 if not self._match(TokenType.CONSTRAINT): 5669 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5670 5671 return self.expression( 5672 exp.Constraint, 5673 this=self._parse_id_var(), 5674 expressions=self._parse_unnamed_constraints(), 5675 ) 5676 5677 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5678 constraints = [] 5679 while True: 5680 constraint = self._parse_unnamed_constraint() or self._parse_function() 5681 if not constraint: 5682 break 5683 constraints.append(constraint) 5684 5685 return constraints 5686 5687 def _parse_unnamed_constraint( 5688 self, constraints: t.Optional[t.Collection[str]] = None 5689 ) -> t.Optional[exp.Expression]: 5690 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5691 constraints or self.CONSTRAINT_PARSERS 5692 ): 5693 return None 5694 5695 constraint = self._prev.text.upper() 5696 if constraint not in self.CONSTRAINT_PARSERS: 5697 self.raise_error(f"No parser found for schema constraint {constraint}.") 5698 5699 return self.CONSTRAINT_PARSERS[constraint](self) 5700 5701 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5702 return self._parse_id_var(any_token=False) 5703 5704 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5705 self._match_text_seq("KEY") 5706 return self.expression( 5707 exp.UniqueColumnConstraint, 5708 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5709 this=self._parse_schema(self._parse_unique_key()), 5710 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5711 on_conflict=self._parse_on_conflict(), 5712 ) 5713 5714 def _parse_key_constraint_options(self) -> t.List[str]: 5715 options = [] 5716 while True: 5717 if not self._curr: 5718 break 5719 5720 if self._match(TokenType.ON): 5721 action = None 5722 on = self._advance_any() and self._prev.text 5723 5724 if self._match_text_seq("NO", "ACTION"): 5725 action = "NO ACTION" 5726 elif self._match_text_seq("CASCADE"): 5727 action = "CASCADE" 5728 elif self._match_text_seq("RESTRICT"): 5729 action = "RESTRICT" 5730 elif self._match_pair(TokenType.SET, TokenType.NULL): 5731 action = "SET NULL" 5732 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5733 action = "SET DEFAULT" 5734 else: 5735 self.raise_error("Invalid key constraint") 5736 5737 options.append(f"ON {on} {action}") 5738 else: 5739 var = self._parse_var_from_options( 5740 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5741 ) 5742 if not var: 5743 break 5744 options.append(var.name) 5745 5746 return options 5747 5748 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5749 if match and not self._match(TokenType.REFERENCES): 5750 return None 5751 5752 expressions = None 5753 this = self._parse_table(schema=True) 5754 options = self._parse_key_constraint_options() 5755 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5756 5757 def _parse_foreign_key(self) -> exp.ForeignKey: 5758 expressions = self._parse_wrapped_id_vars() 5759 reference = self._parse_references() 5760 options = {} 5761 5762 while self._match(TokenType.ON): 5763 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5764 self.raise_error("Expected DELETE or UPDATE") 5765 5766 kind = self._prev.text.lower() 5767 5768 if self._match_text_seq("NO", "ACTION"): 5769 action = "NO ACTION" 5770 elif self._match(TokenType.SET): 5771 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5772 action = "SET " + self._prev.text.upper() 5773 else: 5774 self._advance() 5775 action = self._prev.text.upper() 5776 5777 options[kind] = action 5778 5779 return self.expression( 5780 exp.ForeignKey, 5781 expressions=expressions, 5782 reference=reference, 5783 **options, # type: ignore 5784 ) 5785 5786 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5787 return self._parse_field() 5788 5789 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5790 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5791 self._retreat(self._index - 1) 5792 return None 5793 5794 id_vars = self._parse_wrapped_id_vars() 5795 return self.expression( 5796 exp.PeriodForSystemTimeConstraint, 5797 this=seq_get(id_vars, 0), 5798 expression=seq_get(id_vars, 1), 5799 ) 5800 5801 def _parse_primary_key( 5802 self, wrapped_optional: bool = False, in_props: bool = False 5803 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5804 desc = ( 5805 self._match_set((TokenType.ASC, TokenType.DESC)) 5806 and self._prev.token_type == TokenType.DESC 5807 ) 5808 5809 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5810 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5811 5812 expressions = self._parse_wrapped_csv( 5813 self._parse_primary_key_part, optional=wrapped_optional 5814 ) 5815 options = self._parse_key_constraint_options() 5816 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5817 5818 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5819 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5820 5821 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5822 """ 5823 Parses a datetime column in ODBC format. We parse the column into the corresponding 5824 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5825 same as we did for `DATE('yyyy-mm-dd')`. 5826 5827 Reference: 5828 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5829 """ 5830 self._match(TokenType.VAR) 5831 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5832 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5833 if not self._match(TokenType.R_BRACE): 5834 self.raise_error("Expected }") 5835 return expression 5836 5837 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5838 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5839 return this 5840 5841 bracket_kind = self._prev.token_type 5842 if ( 5843 bracket_kind == TokenType.L_BRACE 5844 and self._curr 5845 and self._curr.token_type == TokenType.VAR 5846 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5847 ): 5848 return self._parse_odbc_datetime_literal() 5849 5850 expressions = self._parse_csv( 5851 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5852 ) 5853 5854 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5855 self.raise_error("Expected ]") 5856 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5857 self.raise_error("Expected }") 5858 5859 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5860 if bracket_kind == TokenType.L_BRACE: 5861 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5862 elif not this: 5863 this = build_array_constructor( 5864 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5865 ) 5866 else: 5867 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5868 if constructor_type: 5869 return build_array_constructor( 5870 constructor_type, 5871 args=expressions, 5872 bracket_kind=bracket_kind, 5873 dialect=self.dialect, 5874 ) 5875 5876 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5877 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5878 5879 self._add_comments(this) 5880 return self._parse_bracket(this) 5881 5882 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5883 if self._match(TokenType.COLON): 5884 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5885 return this 5886 5887 def _parse_case(self) -> t.Optional[exp.Expression]: 5888 ifs = [] 5889 default = None 5890 5891 comments = self._prev_comments 5892 expression = self._parse_assignment() 5893 5894 while self._match(TokenType.WHEN): 5895 this = self._parse_assignment() 5896 self._match(TokenType.THEN) 5897 then = self._parse_assignment() 5898 ifs.append(self.expression(exp.If, this=this, true=then)) 5899 5900 if self._match(TokenType.ELSE): 5901 default = self._parse_assignment() 5902 5903 if not self._match(TokenType.END): 5904 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5905 default = exp.column("interval") 5906 else: 5907 self.raise_error("Expected END after CASE", self._prev) 5908 5909 return self.expression( 5910 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5911 ) 5912 5913 def _parse_if(self) -> t.Optional[exp.Expression]: 5914 if self._match(TokenType.L_PAREN): 5915 args = self._parse_csv(self._parse_assignment) 5916 this = self.validate_expression(exp.If.from_arg_list(args), args) 5917 self._match_r_paren() 5918 else: 5919 index = self._index - 1 5920 5921 if self.NO_PAREN_IF_COMMANDS and index == 0: 5922 return self._parse_as_command(self._prev) 5923 5924 condition = self._parse_assignment() 5925 5926 if not condition: 5927 self._retreat(index) 5928 return None 5929 5930 self._match(TokenType.THEN) 5931 true = self._parse_assignment() 5932 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5933 self._match(TokenType.END) 5934 this = self.expression(exp.If, this=condition, true=true, false=false) 5935 5936 return this 5937 5938 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5939 if not self._match_text_seq("VALUE", "FOR"): 5940 self._retreat(self._index - 1) 5941 return None 5942 5943 return self.expression( 5944 exp.NextValueFor, 5945 this=self._parse_column(), 5946 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5947 ) 5948 5949 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5950 this = self._parse_function() or self._parse_var_or_string(upper=True) 5951 5952 if self._match(TokenType.FROM): 5953 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5954 5955 if not self._match(TokenType.COMMA): 5956 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5957 5958 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5959 5960 def _parse_gap_fill(self) -> exp.GapFill: 5961 self._match(TokenType.TABLE) 5962 this = self._parse_table() 5963 5964 self._match(TokenType.COMMA) 5965 args = [this, *self._parse_csv(self._parse_lambda)] 5966 5967 gap_fill = exp.GapFill.from_arg_list(args) 5968 return self.validate_expression(gap_fill, args) 5969 5970 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5971 this = self._parse_assignment() 5972 5973 if not self._match(TokenType.ALIAS): 5974 if self._match(TokenType.COMMA): 5975 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5976 5977 self.raise_error("Expected AS after CAST") 5978 5979 fmt = None 5980 to = self._parse_types() 5981 5982 if self._match(TokenType.FORMAT): 5983 fmt_string = self._parse_string() 5984 fmt = self._parse_at_time_zone(fmt_string) 5985 5986 if not to: 5987 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5988 if to.this in exp.DataType.TEMPORAL_TYPES: 5989 this = self.expression( 5990 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5991 this=this, 5992 format=exp.Literal.string( 5993 format_time( 5994 fmt_string.this if fmt_string else "", 5995 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5996 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5997 ) 5998 ), 5999 safe=safe, 6000 ) 6001 6002 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6003 this.set("zone", fmt.args["zone"]) 6004 return this 6005 elif not to: 6006 self.raise_error("Expected TYPE after CAST") 6007 elif isinstance(to, exp.Identifier): 6008 to = exp.DataType.build(to.name, udt=True) 6009 elif to.this == exp.DataType.Type.CHAR: 6010 if self._match(TokenType.CHARACTER_SET): 6011 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6012 6013 return self.expression( 6014 exp.Cast if strict else exp.TryCast, 6015 this=this, 6016 to=to, 6017 format=fmt, 6018 safe=safe, 6019 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6020 ) 6021 6022 def _parse_string_agg(self) -> exp.GroupConcat: 6023 if self._match(TokenType.DISTINCT): 6024 args: t.List[t.Optional[exp.Expression]] = [ 6025 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6026 ] 6027 if self._match(TokenType.COMMA): 6028 args.extend(self._parse_csv(self._parse_assignment)) 6029 else: 6030 args = self._parse_csv(self._parse_assignment) # type: ignore 6031 6032 if self._match_text_seq("ON", "OVERFLOW"): 6033 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6034 if self._match_text_seq("ERROR"): 6035 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6036 else: 6037 self._match_text_seq("TRUNCATE") 6038 on_overflow = self.expression( 6039 exp.OverflowTruncateBehavior, 6040 this=self._parse_string(), 6041 with_count=( 6042 self._match_text_seq("WITH", "COUNT") 6043 or not self._match_text_seq("WITHOUT", "COUNT") 6044 ), 6045 ) 6046 else: 6047 on_overflow = None 6048 6049 index = self._index 6050 if not self._match(TokenType.R_PAREN) and args: 6051 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6052 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6053 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6054 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6055 6056 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6057 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6058 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6059 if not self._match_text_seq("WITHIN", "GROUP"): 6060 self._retreat(index) 6061 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6062 6063 # The corresponding match_r_paren will be called in parse_function (caller) 6064 self._match_l_paren() 6065 6066 return self.expression( 6067 exp.GroupConcat, 6068 this=self._parse_order(this=seq_get(args, 0)), 6069 separator=seq_get(args, 1), 6070 on_overflow=on_overflow, 6071 ) 6072 6073 def _parse_convert( 6074 self, strict: bool, safe: t.Optional[bool] = None 6075 ) -> t.Optional[exp.Expression]: 6076 this = self._parse_bitwise() 6077 6078 if self._match(TokenType.USING): 6079 to: t.Optional[exp.Expression] = self.expression( 6080 exp.CharacterSet, this=self._parse_var() 6081 ) 6082 elif self._match(TokenType.COMMA): 6083 to = self._parse_types() 6084 else: 6085 to = None 6086 6087 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6088 6089 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6090 """ 6091 There are generally two variants of the DECODE function: 6092 6093 - DECODE(bin, charset) 6094 - DECODE(expression, search, result [, search, result] ... [, default]) 6095 6096 The second variant will always be parsed into a CASE expression. Note that NULL 6097 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6098 instead of relying on pattern matching. 6099 """ 6100 args = self._parse_csv(self._parse_assignment) 6101 6102 if len(args) < 3: 6103 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6104 6105 expression, *expressions = args 6106 if not expression: 6107 return None 6108 6109 ifs = [] 6110 for search, result in zip(expressions[::2], expressions[1::2]): 6111 if not search or not result: 6112 return None 6113 6114 if isinstance(search, exp.Literal): 6115 ifs.append( 6116 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6117 ) 6118 elif isinstance(search, exp.Null): 6119 ifs.append( 6120 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6121 ) 6122 else: 6123 cond = exp.or_( 6124 exp.EQ(this=expression.copy(), expression=search), 6125 exp.and_( 6126 exp.Is(this=expression.copy(), expression=exp.Null()), 6127 exp.Is(this=search.copy(), expression=exp.Null()), 6128 copy=False, 6129 ), 6130 copy=False, 6131 ) 6132 ifs.append(exp.If(this=cond, true=result)) 6133 6134 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6135 6136 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6137 self._match_text_seq("KEY") 6138 key = self._parse_column() 6139 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6140 self._match_text_seq("VALUE") 6141 value = self._parse_bitwise() 6142 6143 if not key and not value: 6144 return None 6145 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6146 6147 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6148 if not this or not self._match_text_seq("FORMAT", "JSON"): 6149 return this 6150 6151 return self.expression(exp.FormatJson, this=this) 6152 6153 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6154 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6155 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6156 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6157 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6158 else: 6159 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6160 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6161 6162 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6163 6164 if not empty and not error and not null: 6165 return None 6166 6167 return self.expression( 6168 exp.OnCondition, 6169 empty=empty, 6170 error=error, 6171 null=null, 6172 ) 6173 6174 def _parse_on_handling( 6175 self, on: str, *values: str 6176 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6177 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6178 for value in values: 6179 if self._match_text_seq(value, "ON", on): 6180 return f"{value} ON {on}" 6181 6182 index = self._index 6183 if self._match(TokenType.DEFAULT): 6184 default_value = self._parse_bitwise() 6185 if self._match_text_seq("ON", on): 6186 return default_value 6187 6188 self._retreat(index) 6189 6190 return None 6191 6192 @t.overload 6193 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6194 6195 @t.overload 6196 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6197 6198 def _parse_json_object(self, agg=False): 6199 star = self._parse_star() 6200 expressions = ( 6201 [star] 6202 if star 6203 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6204 ) 6205 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6206 6207 unique_keys = None 6208 if self._match_text_seq("WITH", "UNIQUE"): 6209 unique_keys = True 6210 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6211 unique_keys = False 6212 6213 self._match_text_seq("KEYS") 6214 6215 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6216 self._parse_type() 6217 ) 6218 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6219 6220 return self.expression( 6221 exp.JSONObjectAgg if agg else exp.JSONObject, 6222 expressions=expressions, 6223 null_handling=null_handling, 6224 unique_keys=unique_keys, 6225 return_type=return_type, 6226 encoding=encoding, 6227 ) 6228 6229 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6230 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6231 if not self._match_text_seq("NESTED"): 6232 this = self._parse_id_var() 6233 kind = self._parse_types(allow_identifiers=False) 6234 nested = None 6235 else: 6236 this = None 6237 kind = None 6238 nested = True 6239 6240 path = self._match_text_seq("PATH") and self._parse_string() 6241 nested_schema = nested and self._parse_json_schema() 6242 6243 return self.expression( 6244 exp.JSONColumnDef, 6245 this=this, 6246 kind=kind, 6247 path=path, 6248 nested_schema=nested_schema, 6249 ) 6250 6251 def _parse_json_schema(self) -> exp.JSONSchema: 6252 self._match_text_seq("COLUMNS") 6253 return self.expression( 6254 exp.JSONSchema, 6255 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6256 ) 6257 6258 def _parse_json_table(self) -> exp.JSONTable: 6259 this = self._parse_format_json(self._parse_bitwise()) 6260 path = self._match(TokenType.COMMA) and self._parse_string() 6261 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6262 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6263 schema = self._parse_json_schema() 6264 6265 return exp.JSONTable( 6266 this=this, 6267 schema=schema, 6268 path=path, 6269 error_handling=error_handling, 6270 empty_handling=empty_handling, 6271 ) 6272 6273 def _parse_match_against(self) -> exp.MatchAgainst: 6274 expressions = self._parse_csv(self._parse_column) 6275 6276 self._match_text_seq(")", "AGAINST", "(") 6277 6278 this = self._parse_string() 6279 6280 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6281 modifier = "IN NATURAL LANGUAGE MODE" 6282 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6283 modifier = f"{modifier} WITH QUERY EXPANSION" 6284 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6285 modifier = "IN BOOLEAN MODE" 6286 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6287 modifier = "WITH QUERY EXPANSION" 6288 else: 6289 modifier = None 6290 6291 return self.expression( 6292 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6293 ) 6294 6295 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6296 def _parse_open_json(self) -> exp.OpenJSON: 6297 this = self._parse_bitwise() 6298 path = self._match(TokenType.COMMA) and self._parse_string() 6299 6300 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6301 this = self._parse_field(any_token=True) 6302 kind = self._parse_types() 6303 path = self._parse_string() 6304 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6305 6306 return self.expression( 6307 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6308 ) 6309 6310 expressions = None 6311 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6312 self._match_l_paren() 6313 expressions = self._parse_csv(_parse_open_json_column_def) 6314 6315 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6316 6317 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6318 args = self._parse_csv(self._parse_bitwise) 6319 6320 if self._match(TokenType.IN): 6321 return self.expression( 6322 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6323 ) 6324 6325 if haystack_first: 6326 haystack = seq_get(args, 0) 6327 needle = seq_get(args, 1) 6328 else: 6329 needle = seq_get(args, 0) 6330 haystack = seq_get(args, 1) 6331 6332 return self.expression( 6333 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6334 ) 6335 6336 def _parse_predict(self) -> exp.Predict: 6337 self._match_text_seq("MODEL") 6338 this = self._parse_table() 6339 6340 self._match(TokenType.COMMA) 6341 self._match_text_seq("TABLE") 6342 6343 return self.expression( 6344 exp.Predict, 6345 this=this, 6346 expression=self._parse_table(), 6347 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6348 ) 6349 6350 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6351 args = self._parse_csv(self._parse_table) 6352 return exp.JoinHint(this=func_name.upper(), expressions=args) 6353 6354 def _parse_substring(self) -> exp.Substring: 6355 # Postgres supports the form: substring(string [from int] [for int]) 6356 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6357 6358 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6359 6360 if self._match(TokenType.FROM): 6361 args.append(self._parse_bitwise()) 6362 if self._match(TokenType.FOR): 6363 if len(args) == 1: 6364 args.append(exp.Literal.number(1)) 6365 args.append(self._parse_bitwise()) 6366 6367 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6368 6369 def _parse_trim(self) -> exp.Trim: 6370 # https://www.w3resource.com/sql/character-functions/trim.php 6371 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6372 6373 position = None 6374 collation = None 6375 expression = None 6376 6377 if self._match_texts(self.TRIM_TYPES): 6378 position = self._prev.text.upper() 6379 6380 this = self._parse_bitwise() 6381 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6382 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6383 expression = self._parse_bitwise() 6384 6385 if invert_order: 6386 this, expression = expression, this 6387 6388 if self._match(TokenType.COLLATE): 6389 collation = self._parse_bitwise() 6390 6391 return self.expression( 6392 exp.Trim, this=this, position=position, expression=expression, collation=collation 6393 ) 6394 6395 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6396 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6397 6398 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6399 return self._parse_window(self._parse_id_var(), alias=True) 6400 6401 def _parse_respect_or_ignore_nulls( 6402 self, this: t.Optional[exp.Expression] 6403 ) -> t.Optional[exp.Expression]: 6404 if self._match_text_seq("IGNORE", "NULLS"): 6405 return self.expression(exp.IgnoreNulls, this=this) 6406 if self._match_text_seq("RESPECT", "NULLS"): 6407 return self.expression(exp.RespectNulls, this=this) 6408 return this 6409 6410 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6411 if self._match(TokenType.HAVING): 6412 self._match_texts(("MAX", "MIN")) 6413 max = self._prev.text.upper() != "MIN" 6414 return self.expression( 6415 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6416 ) 6417 6418 return this 6419 6420 def _parse_window( 6421 self, this: t.Optional[exp.Expression], alias: bool = False 6422 ) -> t.Optional[exp.Expression]: 6423 func = this 6424 comments = func.comments if isinstance(func, exp.Expression) else None 6425 6426 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6427 self._match(TokenType.WHERE) 6428 this = self.expression( 6429 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6430 ) 6431 self._match_r_paren() 6432 6433 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6434 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6435 if self._match_text_seq("WITHIN", "GROUP"): 6436 order = self._parse_wrapped(self._parse_order) 6437 this = self.expression(exp.WithinGroup, this=this, expression=order) 6438 6439 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6440 # Some dialects choose to implement and some do not. 6441 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6442 6443 # There is some code above in _parse_lambda that handles 6444 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6445 6446 # The below changes handle 6447 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6448 6449 # Oracle allows both formats 6450 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6451 # and Snowflake chose to do the same for familiarity 6452 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6453 if isinstance(this, exp.AggFunc): 6454 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6455 6456 if ignore_respect and ignore_respect is not this: 6457 ignore_respect.replace(ignore_respect.this) 6458 this = self.expression(ignore_respect.__class__, this=this) 6459 6460 this = self._parse_respect_or_ignore_nulls(this) 6461 6462 # bigquery select from window x AS (partition by ...) 6463 if alias: 6464 over = None 6465 self._match(TokenType.ALIAS) 6466 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6467 return this 6468 else: 6469 over = self._prev.text.upper() 6470 6471 if comments and isinstance(func, exp.Expression): 6472 func.pop_comments() 6473 6474 if not self._match(TokenType.L_PAREN): 6475 return self.expression( 6476 exp.Window, 6477 comments=comments, 6478 this=this, 6479 alias=self._parse_id_var(False), 6480 over=over, 6481 ) 6482 6483 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6484 6485 first = self._match(TokenType.FIRST) 6486 if self._match_text_seq("LAST"): 6487 first = False 6488 6489 partition, order = self._parse_partition_and_order() 6490 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6491 6492 if kind: 6493 self._match(TokenType.BETWEEN) 6494 start = self._parse_window_spec() 6495 self._match(TokenType.AND) 6496 end = self._parse_window_spec() 6497 6498 spec = self.expression( 6499 exp.WindowSpec, 6500 kind=kind, 6501 start=start["value"], 6502 start_side=start["side"], 6503 end=end["value"], 6504 end_side=end["side"], 6505 ) 6506 else: 6507 spec = None 6508 6509 self._match_r_paren() 6510 6511 window = self.expression( 6512 exp.Window, 6513 comments=comments, 6514 this=this, 6515 partition_by=partition, 6516 order=order, 6517 spec=spec, 6518 alias=window_alias, 6519 over=over, 6520 first=first, 6521 ) 6522 6523 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6524 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6525 return self._parse_window(window, alias=alias) 6526 6527 return window 6528 6529 def _parse_partition_and_order( 6530 self, 6531 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6532 return self._parse_partition_by(), self._parse_order() 6533 6534 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6535 self._match(TokenType.BETWEEN) 6536 6537 return { 6538 "value": ( 6539 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6540 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6541 or self._parse_bitwise() 6542 ), 6543 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6544 } 6545 6546 def _parse_alias( 6547 self, this: t.Optional[exp.Expression], explicit: bool = False 6548 ) -> t.Optional[exp.Expression]: 6549 any_token = self._match(TokenType.ALIAS) 6550 comments = self._prev_comments or [] 6551 6552 if explicit and not any_token: 6553 return this 6554 6555 if self._match(TokenType.L_PAREN): 6556 aliases = self.expression( 6557 exp.Aliases, 6558 comments=comments, 6559 this=this, 6560 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6561 ) 6562 self._match_r_paren(aliases) 6563 return aliases 6564 6565 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6566 self.STRING_ALIASES and self._parse_string_as_identifier() 6567 ) 6568 6569 if alias: 6570 comments.extend(alias.pop_comments()) 6571 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6572 column = this.this 6573 6574 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6575 if not this.comments and column and column.comments: 6576 this.comments = column.pop_comments() 6577 6578 return this 6579 6580 def _parse_id_var( 6581 self, 6582 any_token: bool = True, 6583 tokens: t.Optional[t.Collection[TokenType]] = None, 6584 ) -> t.Optional[exp.Expression]: 6585 expression = self._parse_identifier() 6586 if not expression and ( 6587 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6588 ): 6589 quoted = self._prev.token_type == TokenType.STRING 6590 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6591 6592 return expression 6593 6594 def _parse_string(self) -> t.Optional[exp.Expression]: 6595 if self._match_set(self.STRING_PARSERS): 6596 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6597 return self._parse_placeholder() 6598 6599 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6600 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6601 6602 def _parse_number(self) -> t.Optional[exp.Expression]: 6603 if self._match_set(self.NUMERIC_PARSERS): 6604 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6605 return self._parse_placeholder() 6606 6607 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6608 if self._match(TokenType.IDENTIFIER): 6609 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6610 return self._parse_placeholder() 6611 6612 def _parse_var( 6613 self, 6614 any_token: bool = False, 6615 tokens: t.Optional[t.Collection[TokenType]] = None, 6616 upper: bool = False, 6617 ) -> t.Optional[exp.Expression]: 6618 if ( 6619 (any_token and self._advance_any()) 6620 or self._match(TokenType.VAR) 6621 or (self._match_set(tokens) if tokens else False) 6622 ): 6623 return self.expression( 6624 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6625 ) 6626 return self._parse_placeholder() 6627 6628 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6629 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6630 self._advance() 6631 return self._prev 6632 return None 6633 6634 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6635 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6636 6637 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6638 return self._parse_primary() or self._parse_var(any_token=True) 6639 6640 def _parse_null(self) -> t.Optional[exp.Expression]: 6641 if self._match_set(self.NULL_TOKENS): 6642 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6643 return self._parse_placeholder() 6644 6645 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6646 if self._match(TokenType.TRUE): 6647 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6648 if self._match(TokenType.FALSE): 6649 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6650 return self._parse_placeholder() 6651 6652 def _parse_star(self) -> t.Optional[exp.Expression]: 6653 if self._match(TokenType.STAR): 6654 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6655 return self._parse_placeholder() 6656 6657 def _parse_parameter(self) -> exp.Parameter: 6658 this = self._parse_identifier() or self._parse_primary_or_var() 6659 return self.expression(exp.Parameter, this=this) 6660 6661 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6662 if self._match_set(self.PLACEHOLDER_PARSERS): 6663 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6664 if placeholder: 6665 return placeholder 6666 self._advance(-1) 6667 return None 6668 6669 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6670 if not self._match_texts(keywords): 6671 return None 6672 if self._match(TokenType.L_PAREN, advance=False): 6673 return self._parse_wrapped_csv(self._parse_expression) 6674 6675 expression = self._parse_expression() 6676 return [expression] if expression else None 6677 6678 def _parse_csv( 6679 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6680 ) -> t.List[exp.Expression]: 6681 parse_result = parse_method() 6682 items = [parse_result] if parse_result is not None else [] 6683 6684 while self._match(sep): 6685 self._add_comments(parse_result) 6686 parse_result = parse_method() 6687 if parse_result is not None: 6688 items.append(parse_result) 6689 6690 return items 6691 6692 def _parse_tokens( 6693 self, parse_method: t.Callable, expressions: t.Dict 6694 ) -> t.Optional[exp.Expression]: 6695 this = parse_method() 6696 6697 while self._match_set(expressions): 6698 this = self.expression( 6699 expressions[self._prev.token_type], 6700 this=this, 6701 comments=self._prev_comments, 6702 expression=parse_method(), 6703 ) 6704 6705 return this 6706 6707 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6708 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6709 6710 def _parse_wrapped_csv( 6711 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6712 ) -> t.List[exp.Expression]: 6713 return self._parse_wrapped( 6714 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6715 ) 6716 6717 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6718 wrapped = self._match(TokenType.L_PAREN) 6719 if not wrapped and not optional: 6720 self.raise_error("Expecting (") 6721 parse_result = parse_method() 6722 if wrapped: 6723 self._match_r_paren() 6724 return parse_result 6725 6726 def _parse_expressions(self) -> t.List[exp.Expression]: 6727 return self._parse_csv(self._parse_expression) 6728 6729 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6730 return self._parse_select() or self._parse_set_operations( 6731 self._parse_expression() if alias else self._parse_assignment() 6732 ) 6733 6734 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6735 return self._parse_query_modifiers( 6736 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6737 ) 6738 6739 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6740 this = None 6741 if self._match_texts(self.TRANSACTION_KIND): 6742 this = self._prev.text 6743 6744 self._match_texts(("TRANSACTION", "WORK")) 6745 6746 modes = [] 6747 while True: 6748 mode = [] 6749 while self._match(TokenType.VAR): 6750 mode.append(self._prev.text) 6751 6752 if mode: 6753 modes.append(" ".join(mode)) 6754 if not self._match(TokenType.COMMA): 6755 break 6756 6757 return self.expression(exp.Transaction, this=this, modes=modes) 6758 6759 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6760 chain = None 6761 savepoint = None 6762 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6763 6764 self._match_texts(("TRANSACTION", "WORK")) 6765 6766 if self._match_text_seq("TO"): 6767 self._match_text_seq("SAVEPOINT") 6768 savepoint = self._parse_id_var() 6769 6770 if self._match(TokenType.AND): 6771 chain = not self._match_text_seq("NO") 6772 self._match_text_seq("CHAIN") 6773 6774 if is_rollback: 6775 return self.expression(exp.Rollback, savepoint=savepoint) 6776 6777 return self.expression(exp.Commit, chain=chain) 6778 6779 def _parse_refresh(self) -> exp.Refresh: 6780 self._match(TokenType.TABLE) 6781 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6782 6783 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6784 if not self._match_text_seq("ADD"): 6785 return None 6786 6787 self._match(TokenType.COLUMN) 6788 exists_column = self._parse_exists(not_=True) 6789 expression = self._parse_field_def() 6790 6791 if expression: 6792 expression.set("exists", exists_column) 6793 6794 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6795 if self._match_texts(("FIRST", "AFTER")): 6796 position = self._prev.text 6797 column_position = self.expression( 6798 exp.ColumnPosition, this=self._parse_column(), position=position 6799 ) 6800 expression.set("position", column_position) 6801 6802 return expression 6803 6804 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6805 drop = self._match(TokenType.DROP) and self._parse_drop() 6806 if drop and not isinstance(drop, exp.Command): 6807 drop.set("kind", drop.args.get("kind", "COLUMN")) 6808 return drop 6809 6810 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6811 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6812 return self.expression( 6813 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6814 ) 6815 6816 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6817 index = self._index - 1 6818 6819 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6820 return self._parse_csv( 6821 lambda: self.expression( 6822 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6823 ) 6824 ) 6825 6826 self._retreat(index) 6827 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6828 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6829 6830 if self._match_text_seq("ADD", "COLUMNS"): 6831 schema = self._parse_schema() 6832 if schema: 6833 return [schema] 6834 return [] 6835 6836 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6837 6838 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6839 if self._match_texts(self.ALTER_ALTER_PARSERS): 6840 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6841 6842 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6843 # keyword after ALTER we default to parsing this statement 6844 self._match(TokenType.COLUMN) 6845 column = self._parse_field(any_token=True) 6846 6847 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6848 return self.expression(exp.AlterColumn, this=column, drop=True) 6849 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6850 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6851 if self._match(TokenType.COMMENT): 6852 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6853 if self._match_text_seq("DROP", "NOT", "NULL"): 6854 return self.expression( 6855 exp.AlterColumn, 6856 this=column, 6857 drop=True, 6858 allow_null=True, 6859 ) 6860 if self._match_text_seq("SET", "NOT", "NULL"): 6861 return self.expression( 6862 exp.AlterColumn, 6863 this=column, 6864 allow_null=False, 6865 ) 6866 self._match_text_seq("SET", "DATA") 6867 self._match_text_seq("TYPE") 6868 return self.expression( 6869 exp.AlterColumn, 6870 this=column, 6871 dtype=self._parse_types(), 6872 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6873 using=self._match(TokenType.USING) and self._parse_assignment(), 6874 ) 6875 6876 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6877 if self._match_texts(("ALL", "EVEN", "AUTO")): 6878 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6879 6880 self._match_text_seq("KEY", "DISTKEY") 6881 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6882 6883 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6884 if compound: 6885 self._match_text_seq("SORTKEY") 6886 6887 if self._match(TokenType.L_PAREN, advance=False): 6888 return self.expression( 6889 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6890 ) 6891 6892 self._match_texts(("AUTO", "NONE")) 6893 return self.expression( 6894 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6895 ) 6896 6897 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6898 index = self._index - 1 6899 6900 partition_exists = self._parse_exists() 6901 if self._match(TokenType.PARTITION, advance=False): 6902 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6903 6904 self._retreat(index) 6905 return self._parse_csv(self._parse_drop_column) 6906 6907 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 6908 if self._match(TokenType.COLUMN): 6909 exists = self._parse_exists() 6910 old_column = self._parse_column() 6911 to = self._match_text_seq("TO") 6912 new_column = self._parse_column() 6913 6914 if old_column is None or to is None or new_column is None: 6915 return None 6916 6917 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6918 6919 self._match_text_seq("TO") 6920 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 6921 6922 def _parse_alter_table_set(self) -> exp.AlterSet: 6923 alter_set = self.expression(exp.AlterSet) 6924 6925 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6926 "TABLE", "PROPERTIES" 6927 ): 6928 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6929 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6930 alter_set.set("expressions", [self._parse_assignment()]) 6931 elif self._match_texts(("LOGGED", "UNLOGGED")): 6932 alter_set.set("option", exp.var(self._prev.text.upper())) 6933 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6934 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6935 elif self._match_text_seq("LOCATION"): 6936 alter_set.set("location", self._parse_field()) 6937 elif self._match_text_seq("ACCESS", "METHOD"): 6938 alter_set.set("access_method", self._parse_field()) 6939 elif self._match_text_seq("TABLESPACE"): 6940 alter_set.set("tablespace", self._parse_field()) 6941 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6942 alter_set.set("file_format", [self._parse_field()]) 6943 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6944 alter_set.set("file_format", self._parse_wrapped_options()) 6945 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6946 alter_set.set("copy_options", self._parse_wrapped_options()) 6947 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6948 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6949 else: 6950 if self._match_text_seq("SERDE"): 6951 alter_set.set("serde", self._parse_field()) 6952 6953 alter_set.set("expressions", [self._parse_properties()]) 6954 6955 return alter_set 6956 6957 def _parse_alter(self) -> exp.Alter | exp.Command: 6958 start = self._prev 6959 6960 alter_token = self._match_set(self.ALTERABLES) and self._prev 6961 if not alter_token: 6962 return self._parse_as_command(start) 6963 6964 exists = self._parse_exists() 6965 only = self._match_text_seq("ONLY") 6966 this = self._parse_table(schema=True) 6967 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6968 6969 if self._next: 6970 self._advance() 6971 6972 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6973 if parser: 6974 actions = ensure_list(parser(self)) 6975 not_valid = self._match_text_seq("NOT", "VALID") 6976 options = self._parse_csv(self._parse_property) 6977 6978 if not self._curr and actions: 6979 return self.expression( 6980 exp.Alter, 6981 this=this, 6982 kind=alter_token.text.upper(), 6983 exists=exists, 6984 actions=actions, 6985 only=only, 6986 options=options, 6987 cluster=cluster, 6988 not_valid=not_valid, 6989 ) 6990 6991 return self._parse_as_command(start) 6992 6993 def _parse_merge(self) -> exp.Merge: 6994 self._match(TokenType.INTO) 6995 target = self._parse_table() 6996 6997 if target and self._match(TokenType.ALIAS, advance=False): 6998 target.set("alias", self._parse_table_alias()) 6999 7000 self._match(TokenType.USING) 7001 using = self._parse_table() 7002 7003 self._match(TokenType.ON) 7004 on = self._parse_assignment() 7005 7006 return self.expression( 7007 exp.Merge, 7008 this=target, 7009 using=using, 7010 on=on, 7011 expressions=self._parse_when_matched(), 7012 returning=self._parse_returning(), 7013 ) 7014 7015 def _parse_when_matched(self) -> t.List[exp.When]: 7016 whens = [] 7017 7018 while self._match(TokenType.WHEN): 7019 matched = not self._match(TokenType.NOT) 7020 self._match_text_seq("MATCHED") 7021 source = ( 7022 False 7023 if self._match_text_seq("BY", "TARGET") 7024 else self._match_text_seq("BY", "SOURCE") 7025 ) 7026 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7027 7028 self._match(TokenType.THEN) 7029 7030 if self._match(TokenType.INSERT): 7031 this = self._parse_star() 7032 if this: 7033 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7034 else: 7035 then = self.expression( 7036 exp.Insert, 7037 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7038 expression=self._match_text_seq("VALUES") and self._parse_value(), 7039 ) 7040 elif self._match(TokenType.UPDATE): 7041 expressions = self._parse_star() 7042 if expressions: 7043 then = self.expression(exp.Update, expressions=expressions) 7044 else: 7045 then = self.expression( 7046 exp.Update, 7047 expressions=self._match(TokenType.SET) 7048 and self._parse_csv(self._parse_equality), 7049 ) 7050 elif self._match(TokenType.DELETE): 7051 then = self.expression(exp.Var, this=self._prev.text) 7052 else: 7053 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7054 7055 whens.append( 7056 self.expression( 7057 exp.When, 7058 matched=matched, 7059 source=source, 7060 condition=condition, 7061 then=then, 7062 ) 7063 ) 7064 return whens 7065 7066 def _parse_show(self) -> t.Optional[exp.Expression]: 7067 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7068 if parser: 7069 return parser(self) 7070 return self._parse_as_command(self._prev) 7071 7072 def _parse_set_item_assignment( 7073 self, kind: t.Optional[str] = None 7074 ) -> t.Optional[exp.Expression]: 7075 index = self._index 7076 7077 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7078 return self._parse_set_transaction(global_=kind == "GLOBAL") 7079 7080 left = self._parse_primary() or self._parse_column() 7081 assignment_delimiter = self._match_texts(("=", "TO")) 7082 7083 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7084 self._retreat(index) 7085 return None 7086 7087 right = self._parse_statement() or self._parse_id_var() 7088 if isinstance(right, (exp.Column, exp.Identifier)): 7089 right = exp.var(right.name) 7090 7091 this = self.expression(exp.EQ, this=left, expression=right) 7092 return self.expression(exp.SetItem, this=this, kind=kind) 7093 7094 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7095 self._match_text_seq("TRANSACTION") 7096 characteristics = self._parse_csv( 7097 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7098 ) 7099 return self.expression( 7100 exp.SetItem, 7101 expressions=characteristics, 7102 kind="TRANSACTION", 7103 **{"global": global_}, # type: ignore 7104 ) 7105 7106 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7107 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7108 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7109 7110 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7111 index = self._index 7112 set_ = self.expression( 7113 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7114 ) 7115 7116 if self._curr: 7117 self._retreat(index) 7118 return self._parse_as_command(self._prev) 7119 7120 return set_ 7121 7122 def _parse_var_from_options( 7123 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7124 ) -> t.Optional[exp.Var]: 7125 start = self._curr 7126 if not start: 7127 return None 7128 7129 option = start.text.upper() 7130 continuations = options.get(option) 7131 7132 index = self._index 7133 self._advance() 7134 for keywords in continuations or []: 7135 if isinstance(keywords, str): 7136 keywords = (keywords,) 7137 7138 if self._match_text_seq(*keywords): 7139 option = f"{option} {' '.join(keywords)}" 7140 break 7141 else: 7142 if continuations or continuations is None: 7143 if raise_unmatched: 7144 self.raise_error(f"Unknown option {option}") 7145 7146 self._retreat(index) 7147 return None 7148 7149 return exp.var(option) 7150 7151 def _parse_as_command(self, start: Token) -> exp.Command: 7152 while self._curr: 7153 self._advance() 7154 text = self._find_sql(start, self._prev) 7155 size = len(start.text) 7156 self._warn_unsupported() 7157 return exp.Command(this=text[:size], expression=text[size:]) 7158 7159 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7160 settings = [] 7161 7162 self._match_l_paren() 7163 kind = self._parse_id_var() 7164 7165 if self._match(TokenType.L_PAREN): 7166 while True: 7167 key = self._parse_id_var() 7168 value = self._parse_primary() 7169 if not key and value is None: 7170 break 7171 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7172 self._match(TokenType.R_PAREN) 7173 7174 self._match_r_paren() 7175 7176 return self.expression( 7177 exp.DictProperty, 7178 this=this, 7179 kind=kind.this if kind else None, 7180 settings=settings, 7181 ) 7182 7183 def _parse_dict_range(self, this: str) -> exp.DictRange: 7184 self._match_l_paren() 7185 has_min = self._match_text_seq("MIN") 7186 if has_min: 7187 min = self._parse_var() or self._parse_primary() 7188 self._match_text_seq("MAX") 7189 max = self._parse_var() or self._parse_primary() 7190 else: 7191 max = self._parse_var() or self._parse_primary() 7192 min = exp.Literal.number(0) 7193 self._match_r_paren() 7194 return self.expression(exp.DictRange, this=this, min=min, max=max) 7195 7196 def _parse_comprehension( 7197 self, this: t.Optional[exp.Expression] 7198 ) -> t.Optional[exp.Comprehension]: 7199 index = self._index 7200 expression = self._parse_column() 7201 if not self._match(TokenType.IN): 7202 self._retreat(index - 1) 7203 return None 7204 iterator = self._parse_column() 7205 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7206 return self.expression( 7207 exp.Comprehension, 7208 this=this, 7209 expression=expression, 7210 iterator=iterator, 7211 condition=condition, 7212 ) 7213 7214 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7215 if self._match(TokenType.HEREDOC_STRING): 7216 return self.expression(exp.Heredoc, this=self._prev.text) 7217 7218 if not self._match_text_seq("$"): 7219 return None 7220 7221 tags = ["$"] 7222 tag_text = None 7223 7224 if self._is_connected(): 7225 self._advance() 7226 tags.append(self._prev.text.upper()) 7227 else: 7228 self.raise_error("No closing $ found") 7229 7230 if tags[-1] != "$": 7231 if self._is_connected() and self._match_text_seq("$"): 7232 tag_text = tags[-1] 7233 tags.append("$") 7234 else: 7235 self.raise_error("No closing $ found") 7236 7237 heredoc_start = self._curr 7238 7239 while self._curr: 7240 if self._match_text_seq(*tags, advance=False): 7241 this = self._find_sql(heredoc_start, self._prev) 7242 self._advance(len(tags)) 7243 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7244 7245 self._advance() 7246 7247 self.raise_error(f"No closing {''.join(tags)} found") 7248 return None 7249 7250 def _find_parser( 7251 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7252 ) -> t.Optional[t.Callable]: 7253 if not self._curr: 7254 return None 7255 7256 index = self._index 7257 this = [] 7258 while True: 7259 # The current token might be multiple words 7260 curr = self._curr.text.upper() 7261 key = curr.split(" ") 7262 this.append(curr) 7263 7264 self._advance() 7265 result, trie = in_trie(trie, key) 7266 if result == TrieResult.FAILED: 7267 break 7268 7269 if result == TrieResult.EXISTS: 7270 subparser = parsers[" ".join(this)] 7271 return subparser 7272 7273 self._retreat(index) 7274 return None 7275 7276 def _match(self, token_type, advance=True, expression=None): 7277 if not self._curr: 7278 return None 7279 7280 if self._curr.token_type == token_type: 7281 if advance: 7282 self._advance() 7283 self._add_comments(expression) 7284 return True 7285 7286 return None 7287 7288 def _match_set(self, types, advance=True): 7289 if not self._curr: 7290 return None 7291 7292 if self._curr.token_type in types: 7293 if advance: 7294 self._advance() 7295 return True 7296 7297 return None 7298 7299 def _match_pair(self, token_type_a, token_type_b, advance=True): 7300 if not self._curr or not self._next: 7301 return None 7302 7303 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7304 if advance: 7305 self._advance(2) 7306 return True 7307 7308 return None 7309 7310 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7311 if not self._match(TokenType.L_PAREN, expression=expression): 7312 self.raise_error("Expecting (") 7313 7314 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7315 if not self._match(TokenType.R_PAREN, expression=expression): 7316 self.raise_error("Expecting )") 7317 7318 def _match_texts(self, texts, advance=True): 7319 if ( 7320 self._curr 7321 and self._curr.token_type != TokenType.STRING 7322 and self._curr.text.upper() in texts 7323 ): 7324 if advance: 7325 self._advance() 7326 return True 7327 return None 7328 7329 def _match_text_seq(self, *texts, advance=True): 7330 index = self._index 7331 for text in texts: 7332 if ( 7333 self._curr 7334 and self._curr.token_type != TokenType.STRING 7335 and self._curr.text.upper() == text 7336 ): 7337 self._advance() 7338 else: 7339 self._retreat(index) 7340 return None 7341 7342 if not advance: 7343 self._retreat(index) 7344 7345 return True 7346 7347 def _replace_lambda( 7348 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7349 ) -> t.Optional[exp.Expression]: 7350 if not node: 7351 return node 7352 7353 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7354 7355 for column in node.find_all(exp.Column): 7356 typ = lambda_types.get(column.parts[0].name) 7357 if typ is not None: 7358 dot_or_id = column.to_dot() if column.table else column.this 7359 7360 if typ: 7361 dot_or_id = self.expression( 7362 exp.Cast, 7363 this=dot_or_id, 7364 to=typ, 7365 ) 7366 7367 parent = column.parent 7368 7369 while isinstance(parent, exp.Dot): 7370 if not isinstance(parent.parent, exp.Dot): 7371 parent.replace(dot_or_id) 7372 break 7373 parent = parent.parent 7374 else: 7375 if column is node: 7376 node = dot_or_id 7377 else: 7378 column.replace(dot_or_id) 7379 return node 7380 7381 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7382 start = self._prev 7383 7384 # Not to be confused with TRUNCATE(number, decimals) function call 7385 if self._match(TokenType.L_PAREN): 7386 self._retreat(self._index - 2) 7387 return self._parse_function() 7388 7389 # Clickhouse supports TRUNCATE DATABASE as well 7390 is_database = self._match(TokenType.DATABASE) 7391 7392 self._match(TokenType.TABLE) 7393 7394 exists = self._parse_exists(not_=False) 7395 7396 expressions = self._parse_csv( 7397 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7398 ) 7399 7400 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7401 7402 if self._match_text_seq("RESTART", "IDENTITY"): 7403 identity = "RESTART" 7404 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7405 identity = "CONTINUE" 7406 else: 7407 identity = None 7408 7409 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7410 option = self._prev.text 7411 else: 7412 option = None 7413 7414 partition = self._parse_partition() 7415 7416 # Fallback case 7417 if self._curr: 7418 return self._parse_as_command(start) 7419 7420 return self.expression( 7421 exp.TruncateTable, 7422 expressions=expressions, 7423 is_database=is_database, 7424 exists=exists, 7425 cluster=cluster, 7426 identity=identity, 7427 option=option, 7428 partition=partition, 7429 ) 7430 7431 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7432 this = self._parse_ordered(self._parse_opclass) 7433 7434 if not self._match(TokenType.WITH): 7435 return this 7436 7437 op = self._parse_var(any_token=True) 7438 7439 return self.expression(exp.WithOperator, this=this, op=op) 7440 7441 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7442 self._match(TokenType.EQ) 7443 self._match(TokenType.L_PAREN) 7444 7445 opts: t.List[t.Optional[exp.Expression]] = [] 7446 while self._curr and not self._match(TokenType.R_PAREN): 7447 if self._match_text_seq("FORMAT_NAME", "="): 7448 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7449 # so we parse it separately to use _parse_field() 7450 prop = self.expression( 7451 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7452 ) 7453 opts.append(prop) 7454 else: 7455 opts.append(self._parse_property()) 7456 7457 self._match(TokenType.COMMA) 7458 7459 return opts 7460 7461 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7462 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7463 7464 options = [] 7465 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7466 option = self._parse_var(any_token=True) 7467 prev = self._prev.text.upper() 7468 7469 # Different dialects might separate options and values by white space, "=" and "AS" 7470 self._match(TokenType.EQ) 7471 self._match(TokenType.ALIAS) 7472 7473 param = self.expression(exp.CopyParameter, this=option) 7474 7475 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7476 TokenType.L_PAREN, advance=False 7477 ): 7478 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7479 param.set("expressions", self._parse_wrapped_options()) 7480 elif prev == "FILE_FORMAT": 7481 # T-SQL's external file format case 7482 param.set("expression", self._parse_field()) 7483 else: 7484 param.set("expression", self._parse_unquoted_field()) 7485 7486 options.append(param) 7487 self._match(sep) 7488 7489 return options 7490 7491 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7492 expr = self.expression(exp.Credentials) 7493 7494 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7495 expr.set("storage", self._parse_field()) 7496 if self._match_text_seq("CREDENTIALS"): 7497 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7498 creds = ( 7499 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7500 ) 7501 expr.set("credentials", creds) 7502 if self._match_text_seq("ENCRYPTION"): 7503 expr.set("encryption", self._parse_wrapped_options()) 7504 if self._match_text_seq("IAM_ROLE"): 7505 expr.set("iam_role", self._parse_field()) 7506 if self._match_text_seq("REGION"): 7507 expr.set("region", self._parse_field()) 7508 7509 return expr 7510 7511 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7512 return self._parse_field() 7513 7514 def _parse_copy(self) -> exp.Copy | exp.Command: 7515 start = self._prev 7516 7517 self._match(TokenType.INTO) 7518 7519 this = ( 7520 self._parse_select(nested=True, parse_subquery_alias=False) 7521 if self._match(TokenType.L_PAREN, advance=False) 7522 else self._parse_table(schema=True) 7523 ) 7524 7525 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7526 7527 files = self._parse_csv(self._parse_file_location) 7528 credentials = self._parse_credentials() 7529 7530 self._match_text_seq("WITH") 7531 7532 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7533 7534 # Fallback case 7535 if self._curr: 7536 return self._parse_as_command(start) 7537 7538 return self.expression( 7539 exp.Copy, 7540 this=this, 7541 kind=kind, 7542 credentials=credentials, 7543 files=files, 7544 params=params, 7545 ) 7546 7547 def _parse_normalize(self) -> exp.Normalize: 7548 return self.expression( 7549 exp.Normalize, 7550 this=self._parse_bitwise(), 7551 form=self._match(TokenType.COMMA) and self._parse_var(), 7552 ) 7553 7554 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7555 if self._match_text_seq("COLUMNS", "(", advance=False): 7556 this = self._parse_function() 7557 if isinstance(this, exp.Columns): 7558 this.set("unpack", True) 7559 return this 7560 7561 return self.expression( 7562 exp.Star, 7563 **{ # type: ignore 7564 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7565 "replace": self._parse_star_op("REPLACE"), 7566 "rename": self._parse_star_op("RENAME"), 7567 }, 7568 ) 7569 7570 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7571 privilege_parts = [] 7572 7573 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7574 # (end of privilege list) or L_PAREN (start of column list) are met 7575 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7576 privilege_parts.append(self._curr.text.upper()) 7577 self._advance() 7578 7579 this = exp.var(" ".join(privilege_parts)) 7580 expressions = ( 7581 self._parse_wrapped_csv(self._parse_column) 7582 if self._match(TokenType.L_PAREN, advance=False) 7583 else None 7584 ) 7585 7586 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7587 7588 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7589 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7590 principal = self._parse_id_var() 7591 7592 if not principal: 7593 return None 7594 7595 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7596 7597 def _parse_grant(self) -> exp.Grant | exp.Command: 7598 start = self._prev 7599 7600 privileges = self._parse_csv(self._parse_grant_privilege) 7601 7602 self._match(TokenType.ON) 7603 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7604 7605 # Attempt to parse the securable e.g. MySQL allows names 7606 # such as "foo.*", "*.*" which are not easily parseable yet 7607 securable = self._try_parse(self._parse_table_parts) 7608 7609 if not securable or not self._match_text_seq("TO"): 7610 return self._parse_as_command(start) 7611 7612 principals = self._parse_csv(self._parse_grant_principal) 7613 7614 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7615 7616 if self._curr: 7617 return self._parse_as_command(start) 7618 7619 return self.expression( 7620 exp.Grant, 7621 privileges=privileges, 7622 kind=kind, 7623 securable=securable, 7624 principals=principals, 7625 grant_option=grant_option, 7626 ) 7627 7628 def _parse_overlay(self) -> exp.Overlay: 7629 return self.expression( 7630 exp.Overlay, 7631 **{ # type: ignore 7632 "this": self._parse_bitwise(), 7633 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7634 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7635 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7636 }, 7637 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1393 def __init__( 1394 self, 1395 error_level: t.Optional[ErrorLevel] = None, 1396 error_message_context: int = 100, 1397 max_errors: int = 3, 1398 dialect: DialectType = None, 1399 ): 1400 from sqlglot.dialects import Dialect 1401 1402 self.error_level = error_level or ErrorLevel.IMMEDIATE 1403 self.error_message_context = error_message_context 1404 self.max_errors = max_errors 1405 self.dialect = Dialect.get_or_raise(dialect) 1406 self.reset()
1418 def parse( 1419 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1420 ) -> t.List[t.Optional[exp.Expression]]: 1421 """ 1422 Parses a list of tokens and returns a list of syntax trees, one tree 1423 per parsed SQL statement. 1424 1425 Args: 1426 raw_tokens: The list of tokens. 1427 sql: The original SQL string, used to produce helpful debug messages. 1428 1429 Returns: 1430 The list of the produced syntax trees. 1431 """ 1432 return self._parse( 1433 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1434 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1436 def parse_into( 1437 self, 1438 expression_types: exp.IntoType, 1439 raw_tokens: t.List[Token], 1440 sql: t.Optional[str] = None, 1441 ) -> t.List[t.Optional[exp.Expression]]: 1442 """ 1443 Parses a list of tokens into a given Expression type. If a collection of Expression 1444 types is given instead, this method will try to parse the token list into each one 1445 of them, stopping at the first for which the parsing succeeds. 1446 1447 Args: 1448 expression_types: The expression type(s) to try and parse the token list into. 1449 raw_tokens: The list of tokens. 1450 sql: The original SQL string, used to produce helpful debug messages. 1451 1452 Returns: 1453 The target Expression. 1454 """ 1455 errors = [] 1456 for expression_type in ensure_list(expression_types): 1457 parser = self.EXPRESSION_PARSERS.get(expression_type) 1458 if not parser: 1459 raise TypeError(f"No parser registered for {expression_type}") 1460 1461 try: 1462 return self._parse(parser, raw_tokens, sql) 1463 except ParseError as e: 1464 e.errors[0]["into_expression"] = expression_type 1465 errors.append(e) 1466 1467 raise ParseError( 1468 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1469 errors=merge_errors(errors), 1470 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1510 def check_errors(self) -> None: 1511 """Logs or raises any found errors, depending on the chosen error level setting.""" 1512 if self.error_level == ErrorLevel.WARN: 1513 for error in self.errors: 1514 logger.error(str(error)) 1515 elif self.error_level == ErrorLevel.RAISE and self.errors: 1516 raise ParseError( 1517 concat_messages(self.errors, self.max_errors), 1518 errors=merge_errors(self.errors), 1519 )
Logs or raises any found errors, depending on the chosen error level setting.
1521 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1522 """ 1523 Appends an error in the list of recorded errors or raises it, depending on the chosen 1524 error level setting. 1525 """ 1526 token = token or self._curr or self._prev or Token.string("") 1527 start = token.start 1528 end = token.end + 1 1529 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1530 highlight = self.sql[start:end] 1531 end_context = self.sql[end : end + self.error_message_context] 1532 1533 error = ParseError.new( 1534 f"{message}. Line {token.line}, Col: {token.col}.\n" 1535 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1536 description=message, 1537 line=token.line, 1538 col=token.col, 1539 start_context=start_context, 1540 highlight=highlight, 1541 end_context=end_context, 1542 ) 1543 1544 if self.error_level == ErrorLevel.IMMEDIATE: 1545 raise error 1546 1547 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1549 def expression( 1550 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1551 ) -> E: 1552 """ 1553 Creates a new, validated Expression. 1554 1555 Args: 1556 exp_class: The expression class to instantiate. 1557 comments: An optional list of comments to attach to the expression. 1558 kwargs: The arguments to set for the expression along with their respective values. 1559 1560 Returns: 1561 The target expression. 1562 """ 1563 instance = exp_class(**kwargs) 1564 instance.add_comments(comments) if comments else self._add_comments(instance) 1565 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1572 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1573 """ 1574 Validates an Expression, making sure that all its mandatory arguments are set. 1575 1576 Args: 1577 expression: The expression to validate. 1578 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1579 1580 Returns: 1581 The validated expression. 1582 """ 1583 if self.error_level != ErrorLevel.IGNORE: 1584 for error_message in expression.error_messages(args): 1585 self.raise_error(error_message) 1586 1587 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.