sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 154 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 155 156 157def build_locate_strposition(args: t.List): 158 return exp.StrPosition( 159 this=seq_get(args, 1), 160 substr=seq_get(args, 0), 161 position=seq_get(args, 2), 162 ) 163 164 165class _Parser(type): 166 def __new__(cls, clsname, bases, attrs): 167 klass = super().__new__(cls, clsname, bases, attrs) 168 169 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 170 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 171 172 return klass 173 174 175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.BLOB, 332 TokenType.MEDIUMBLOB, 333 TokenType.LONGBLOB, 334 TokenType.BINARY, 335 TokenType.VARBINARY, 336 TokenType.JSON, 337 TokenType.JSONB, 338 TokenType.INTERVAL, 339 TokenType.TINYBLOB, 340 TokenType.TINYTEXT, 341 TokenType.TIME, 342 TokenType.TIMETZ, 343 TokenType.TIMESTAMP, 344 TokenType.TIMESTAMP_S, 345 TokenType.TIMESTAMP_MS, 346 TokenType.TIMESTAMP_NS, 347 TokenType.TIMESTAMPTZ, 348 TokenType.TIMESTAMPLTZ, 349 TokenType.TIMESTAMPNTZ, 350 TokenType.DATETIME, 351 TokenType.DATETIME2, 352 TokenType.DATETIME64, 353 TokenType.SMALLDATETIME, 354 TokenType.DATE, 355 TokenType.DATE32, 356 TokenType.INT4RANGE, 357 TokenType.INT4MULTIRANGE, 358 TokenType.INT8RANGE, 359 TokenType.INT8MULTIRANGE, 360 TokenType.NUMRANGE, 361 TokenType.NUMMULTIRANGE, 362 TokenType.TSRANGE, 363 TokenType.TSMULTIRANGE, 364 TokenType.TSTZRANGE, 365 TokenType.TSTZMULTIRANGE, 366 TokenType.DATERANGE, 367 TokenType.DATEMULTIRANGE, 368 TokenType.DECIMAL, 369 TokenType.DECIMAL32, 370 TokenType.DECIMAL64, 371 TokenType.DECIMAL128, 372 TokenType.DECIMAL256, 373 TokenType.UDECIMAL, 374 TokenType.BIGDECIMAL, 375 TokenType.UUID, 376 TokenType.GEOGRAPHY, 377 TokenType.GEOMETRY, 378 TokenType.POINT, 379 TokenType.RING, 380 TokenType.LINESTRING, 381 TokenType.MULTILINESTRING, 382 TokenType.POLYGON, 383 TokenType.MULTIPOLYGON, 384 TokenType.HLLSKETCH, 385 TokenType.HSTORE, 386 TokenType.PSEUDO_TYPE, 387 TokenType.SUPER, 388 TokenType.SERIAL, 389 TokenType.SMALLSERIAL, 390 TokenType.BIGSERIAL, 391 TokenType.XML, 392 TokenType.YEAR, 393 TokenType.USERDEFINED, 394 TokenType.MONEY, 395 TokenType.SMALLMONEY, 396 TokenType.ROWVERSION, 397 TokenType.IMAGE, 398 TokenType.VARIANT, 399 TokenType.VECTOR, 400 TokenType.OBJECT, 401 TokenType.OBJECT_IDENTIFIER, 402 TokenType.INET, 403 TokenType.IPADDRESS, 404 TokenType.IPPREFIX, 405 TokenType.IPV4, 406 TokenType.IPV6, 407 TokenType.UNKNOWN, 408 TokenType.NOTHING, 409 TokenType.NULL, 410 TokenType.NAME, 411 TokenType.TDIGEST, 412 TokenType.DYNAMIC, 413 *ENUM_TYPE_TOKENS, 414 *NESTED_TYPE_TOKENS, 415 *AGGREGATE_TYPE_TOKENS, 416 } 417 418 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 419 TokenType.BIGINT: TokenType.UBIGINT, 420 TokenType.INT: TokenType.UINT, 421 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 422 TokenType.SMALLINT: TokenType.USMALLINT, 423 TokenType.TINYINT: TokenType.UTINYINT, 424 TokenType.DECIMAL: TokenType.UDECIMAL, 425 TokenType.DOUBLE: TokenType.UDOUBLE, 426 } 427 428 SUBQUERY_PREDICATES = { 429 TokenType.ANY: exp.Any, 430 TokenType.ALL: exp.All, 431 TokenType.EXISTS: exp.Exists, 432 TokenType.SOME: exp.Any, 433 } 434 435 RESERVED_TOKENS = { 436 *Tokenizer.SINGLE_TOKENS.values(), 437 TokenType.SELECT, 438 } - {TokenType.IDENTIFIER} 439 440 DB_CREATABLES = { 441 TokenType.DATABASE, 442 TokenType.DICTIONARY, 443 TokenType.FILE_FORMAT, 444 TokenType.MODEL, 445 TokenType.NAMESPACE, 446 TokenType.SCHEMA, 447 TokenType.SEQUENCE, 448 TokenType.SINK, 449 TokenType.SOURCE, 450 TokenType.STAGE, 451 TokenType.STORAGE_INTEGRATION, 452 TokenType.STREAMLIT, 453 TokenType.TABLE, 454 TokenType.TAG, 455 TokenType.VIEW, 456 TokenType.WAREHOUSE, 457 } 458 459 CREATABLES = { 460 TokenType.COLUMN, 461 TokenType.CONSTRAINT, 462 TokenType.FOREIGN_KEY, 463 TokenType.FUNCTION, 464 TokenType.INDEX, 465 TokenType.PROCEDURE, 466 *DB_CREATABLES, 467 } 468 469 ALTERABLES = { 470 TokenType.INDEX, 471 TokenType.TABLE, 472 TokenType.VIEW, 473 } 474 475 # Tokens that can represent identifiers 476 ID_VAR_TOKENS = { 477 TokenType.ALL, 478 TokenType.ATTACH, 479 TokenType.VAR, 480 TokenType.ANTI, 481 TokenType.APPLY, 482 TokenType.ASC, 483 TokenType.ASOF, 484 TokenType.AUTO_INCREMENT, 485 TokenType.BEGIN, 486 TokenType.BPCHAR, 487 TokenType.CACHE, 488 TokenType.CASE, 489 TokenType.COLLATE, 490 TokenType.COMMAND, 491 TokenType.COMMENT, 492 TokenType.COMMIT, 493 TokenType.CONSTRAINT, 494 TokenType.COPY, 495 TokenType.CUBE, 496 TokenType.CURRENT_SCHEMA, 497 TokenType.DEFAULT, 498 TokenType.DELETE, 499 TokenType.DESC, 500 TokenType.DESCRIBE, 501 TokenType.DETACH, 502 TokenType.DICTIONARY, 503 TokenType.DIV, 504 TokenType.END, 505 TokenType.EXECUTE, 506 TokenType.EXPORT, 507 TokenType.ESCAPE, 508 TokenType.FALSE, 509 TokenType.FIRST, 510 TokenType.FILTER, 511 TokenType.FINAL, 512 TokenType.FORMAT, 513 TokenType.FULL, 514 TokenType.IDENTIFIER, 515 TokenType.IS, 516 TokenType.ISNULL, 517 TokenType.INTERVAL, 518 TokenType.KEEP, 519 TokenType.KILL, 520 TokenType.LEFT, 521 TokenType.LIMIT, 522 TokenType.LOAD, 523 TokenType.MERGE, 524 TokenType.NATURAL, 525 TokenType.NEXT, 526 TokenType.OFFSET, 527 TokenType.OPERATOR, 528 TokenType.ORDINALITY, 529 TokenType.OVERLAPS, 530 TokenType.OVERWRITE, 531 TokenType.PARTITION, 532 TokenType.PERCENT, 533 TokenType.PIVOT, 534 TokenType.PRAGMA, 535 TokenType.PUT, 536 TokenType.RANGE, 537 TokenType.RECURSIVE, 538 TokenType.REFERENCES, 539 TokenType.REFRESH, 540 TokenType.RENAME, 541 TokenType.REPLACE, 542 TokenType.RIGHT, 543 TokenType.ROLLUP, 544 TokenType.ROW, 545 TokenType.ROWS, 546 TokenType.SEMI, 547 TokenType.SET, 548 TokenType.SETTINGS, 549 TokenType.SHOW, 550 TokenType.TEMPORARY, 551 TokenType.TOP, 552 TokenType.TRUE, 553 TokenType.TRUNCATE, 554 TokenType.UNIQUE, 555 TokenType.UNNEST, 556 TokenType.UNPIVOT, 557 TokenType.UPDATE, 558 TokenType.USE, 559 TokenType.VOLATILE, 560 TokenType.WINDOW, 561 *CREATABLES, 562 *SUBQUERY_PREDICATES, 563 *TYPE_TOKENS, 564 *NO_PAREN_FUNCTIONS, 565 } 566 ID_VAR_TOKENS.remove(TokenType.UNION) 567 568 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 569 TokenType.ANTI, 570 TokenType.APPLY, 571 TokenType.ASOF, 572 TokenType.FULL, 573 TokenType.LEFT, 574 TokenType.LOCK, 575 TokenType.NATURAL, 576 TokenType.RIGHT, 577 TokenType.SEMI, 578 TokenType.WINDOW, 579 } 580 581 ALIAS_TOKENS = ID_VAR_TOKENS 582 583 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 584 585 ARRAY_CONSTRUCTORS = { 586 "ARRAY": exp.Array, 587 "LIST": exp.List, 588 } 589 590 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 591 592 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 593 594 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 595 596 FUNC_TOKENS = { 597 TokenType.COLLATE, 598 TokenType.COMMAND, 599 TokenType.CURRENT_DATE, 600 TokenType.CURRENT_DATETIME, 601 TokenType.CURRENT_SCHEMA, 602 TokenType.CURRENT_TIMESTAMP, 603 TokenType.CURRENT_TIME, 604 TokenType.CURRENT_USER, 605 TokenType.FILTER, 606 TokenType.FIRST, 607 TokenType.FORMAT, 608 TokenType.GLOB, 609 TokenType.IDENTIFIER, 610 TokenType.INDEX, 611 TokenType.ISNULL, 612 TokenType.ILIKE, 613 TokenType.INSERT, 614 TokenType.LIKE, 615 TokenType.MERGE, 616 TokenType.NEXT, 617 TokenType.OFFSET, 618 TokenType.PRIMARY_KEY, 619 TokenType.RANGE, 620 TokenType.REPLACE, 621 TokenType.RLIKE, 622 TokenType.ROW, 623 TokenType.UNNEST, 624 TokenType.VAR, 625 TokenType.LEFT, 626 TokenType.RIGHT, 627 TokenType.SEQUENCE, 628 TokenType.DATE, 629 TokenType.DATETIME, 630 TokenType.TABLE, 631 TokenType.TIMESTAMP, 632 TokenType.TIMESTAMPTZ, 633 TokenType.TRUNCATE, 634 TokenType.WINDOW, 635 TokenType.XOR, 636 *TYPE_TOKENS, 637 *SUBQUERY_PREDICATES, 638 } 639 640 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 641 TokenType.AND: exp.And, 642 } 643 644 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 645 TokenType.COLON_EQ: exp.PropertyEQ, 646 } 647 648 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 649 TokenType.OR: exp.Or, 650 } 651 652 EQUALITY = { 653 TokenType.EQ: exp.EQ, 654 TokenType.NEQ: exp.NEQ, 655 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 656 } 657 658 COMPARISON = { 659 TokenType.GT: exp.GT, 660 TokenType.GTE: exp.GTE, 661 TokenType.LT: exp.LT, 662 TokenType.LTE: exp.LTE, 663 } 664 665 BITWISE = { 666 TokenType.AMP: exp.BitwiseAnd, 667 TokenType.CARET: exp.BitwiseXor, 668 TokenType.PIPE: exp.BitwiseOr, 669 } 670 671 TERM = { 672 TokenType.DASH: exp.Sub, 673 TokenType.PLUS: exp.Add, 674 TokenType.MOD: exp.Mod, 675 TokenType.COLLATE: exp.Collate, 676 } 677 678 FACTOR = { 679 TokenType.DIV: exp.IntDiv, 680 TokenType.LR_ARROW: exp.Distance, 681 TokenType.SLASH: exp.Div, 682 TokenType.STAR: exp.Mul, 683 } 684 685 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 686 687 TIMES = { 688 TokenType.TIME, 689 TokenType.TIMETZ, 690 } 691 692 TIMESTAMPS = { 693 TokenType.TIMESTAMP, 694 TokenType.TIMESTAMPNTZ, 695 TokenType.TIMESTAMPTZ, 696 TokenType.TIMESTAMPLTZ, 697 *TIMES, 698 } 699 700 SET_OPERATIONS = { 701 TokenType.UNION, 702 TokenType.INTERSECT, 703 TokenType.EXCEPT, 704 } 705 706 JOIN_METHODS = { 707 TokenType.ASOF, 708 TokenType.NATURAL, 709 TokenType.POSITIONAL, 710 } 711 712 JOIN_SIDES = { 713 TokenType.LEFT, 714 TokenType.RIGHT, 715 TokenType.FULL, 716 } 717 718 JOIN_KINDS = { 719 TokenType.ANTI, 720 TokenType.CROSS, 721 TokenType.INNER, 722 TokenType.OUTER, 723 TokenType.SEMI, 724 TokenType.STRAIGHT_JOIN, 725 } 726 727 JOIN_HINTS: t.Set[str] = set() 728 729 LAMBDAS = { 730 TokenType.ARROW: lambda self, expressions: self.expression( 731 exp.Lambda, 732 this=self._replace_lambda( 733 self._parse_assignment(), 734 expressions, 735 ), 736 expressions=expressions, 737 ), 738 TokenType.FARROW: lambda self, expressions: self.expression( 739 exp.Kwarg, 740 this=exp.var(expressions[0].name), 741 expression=self._parse_assignment(), 742 ), 743 } 744 745 COLUMN_OPERATORS = { 746 TokenType.DOT: None, 747 TokenType.DOTCOLON: lambda self, this, to: self.expression( 748 exp.JSONCast, 749 this=this, 750 to=to, 751 ), 752 TokenType.DCOLON: lambda self, this, to: self.expression( 753 exp.Cast if self.STRICT_CAST else exp.TryCast, 754 this=this, 755 to=to, 756 ), 757 TokenType.ARROW: lambda self, this, path: self.expression( 758 exp.JSONExtract, 759 this=this, 760 expression=self.dialect.to_json_path(path), 761 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 762 ), 763 TokenType.DARROW: lambda self, this, path: self.expression( 764 exp.JSONExtractScalar, 765 this=this, 766 expression=self.dialect.to_json_path(path), 767 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 768 ), 769 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 770 exp.JSONBExtract, 771 this=this, 772 expression=path, 773 ), 774 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 775 exp.JSONBExtractScalar, 776 this=this, 777 expression=path, 778 ), 779 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 780 exp.JSONBContains, 781 this=this, 782 expression=key, 783 ), 784 } 785 786 EXPRESSION_PARSERS = { 787 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 788 exp.Column: lambda self: self._parse_column(), 789 exp.Condition: lambda self: self._parse_assignment(), 790 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 791 exp.Expression: lambda self: self._parse_expression(), 792 exp.From: lambda self: self._parse_from(joins=True), 793 exp.Group: lambda self: self._parse_group(), 794 exp.Having: lambda self: self._parse_having(), 795 exp.Hint: lambda self: self._parse_hint_body(), 796 exp.Identifier: lambda self: self._parse_id_var(), 797 exp.Join: lambda self: self._parse_join(), 798 exp.Lambda: lambda self: self._parse_lambda(), 799 exp.Lateral: lambda self: self._parse_lateral(), 800 exp.Limit: lambda self: self._parse_limit(), 801 exp.Offset: lambda self: self._parse_offset(), 802 exp.Order: lambda self: self._parse_order(), 803 exp.Ordered: lambda self: self._parse_ordered(), 804 exp.Properties: lambda self: self._parse_properties(), 805 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 806 exp.Qualify: lambda self: self._parse_qualify(), 807 exp.Returning: lambda self: self._parse_returning(), 808 exp.Select: lambda self: self._parse_select(), 809 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 810 exp.Table: lambda self: self._parse_table_parts(), 811 exp.TableAlias: lambda self: self._parse_table_alias(), 812 exp.Tuple: lambda self: self._parse_value(values=False), 813 exp.Whens: lambda self: self._parse_when_matched(), 814 exp.Where: lambda self: self._parse_where(), 815 exp.Window: lambda self: self._parse_named_window(), 816 exp.With: lambda self: self._parse_with(), 817 "JOIN_TYPE": lambda self: self._parse_join_parts(), 818 } 819 820 STATEMENT_PARSERS = { 821 TokenType.ALTER: lambda self: self._parse_alter(), 822 TokenType.ANALYZE: lambda self: self._parse_analyze(), 823 TokenType.BEGIN: lambda self: self._parse_transaction(), 824 TokenType.CACHE: lambda self: self._parse_cache(), 825 TokenType.COMMENT: lambda self: self._parse_comment(), 826 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 827 TokenType.COPY: lambda self: self._parse_copy(), 828 TokenType.CREATE: lambda self: self._parse_create(), 829 TokenType.DELETE: lambda self: self._parse_delete(), 830 TokenType.DESC: lambda self: self._parse_describe(), 831 TokenType.DESCRIBE: lambda self: self._parse_describe(), 832 TokenType.DROP: lambda self: self._parse_drop(), 833 TokenType.GRANT: lambda self: self._parse_grant(), 834 TokenType.INSERT: lambda self: self._parse_insert(), 835 TokenType.KILL: lambda self: self._parse_kill(), 836 TokenType.LOAD: lambda self: self._parse_load(), 837 TokenType.MERGE: lambda self: self._parse_merge(), 838 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 839 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 840 TokenType.REFRESH: lambda self: self._parse_refresh(), 841 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 842 TokenType.SET: lambda self: self._parse_set(), 843 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 844 TokenType.UNCACHE: lambda self: self._parse_uncache(), 845 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 846 TokenType.UPDATE: lambda self: self._parse_update(), 847 TokenType.USE: lambda self: self._parse_use(), 848 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 849 } 850 851 UNARY_PARSERS = { 852 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 853 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 854 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 855 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 856 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 857 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 858 } 859 860 STRING_PARSERS = { 861 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 862 exp.RawString, this=token.text 863 ), 864 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 865 exp.National, this=token.text 866 ), 867 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 868 TokenType.STRING: lambda self, token: self.expression( 869 exp.Literal, this=token.text, is_string=True 870 ), 871 TokenType.UNICODE_STRING: lambda self, token: self.expression( 872 exp.UnicodeString, 873 this=token.text, 874 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 875 ), 876 } 877 878 NUMERIC_PARSERS = { 879 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 880 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 881 TokenType.HEX_STRING: lambda self, token: self.expression( 882 exp.HexString, 883 this=token.text, 884 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 885 ), 886 TokenType.NUMBER: lambda self, token: self.expression( 887 exp.Literal, this=token.text, is_string=False 888 ), 889 } 890 891 PRIMARY_PARSERS = { 892 **STRING_PARSERS, 893 **NUMERIC_PARSERS, 894 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 895 TokenType.NULL: lambda self, _: self.expression(exp.Null), 896 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 897 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 898 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 899 TokenType.STAR: lambda self, _: self._parse_star_ops(), 900 } 901 902 PLACEHOLDER_PARSERS = { 903 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 904 TokenType.PARAMETER: lambda self: self._parse_parameter(), 905 TokenType.COLON: lambda self: ( 906 self.expression(exp.Placeholder, this=self._prev.text) 907 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 908 else None 909 ), 910 } 911 912 RANGE_PARSERS = { 913 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 914 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 915 TokenType.GLOB: binary_range_parser(exp.Glob), 916 TokenType.ILIKE: binary_range_parser(exp.ILike), 917 TokenType.IN: lambda self, this: self._parse_in(this), 918 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 919 TokenType.IS: lambda self, this: self._parse_is(this), 920 TokenType.LIKE: binary_range_parser(exp.Like), 921 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 922 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 923 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 924 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 925 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 926 } 927 928 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 929 "ALLOWED_VALUES": lambda self: self.expression( 930 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 931 ), 932 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 933 "AUTO": lambda self: self._parse_auto_property(), 934 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 935 "BACKUP": lambda self: self.expression( 936 exp.BackupProperty, this=self._parse_var(any_token=True) 937 ), 938 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 939 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 940 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 941 "CHECKSUM": lambda self: self._parse_checksum(), 942 "CLUSTER BY": lambda self: self._parse_cluster(), 943 "CLUSTERED": lambda self: self._parse_clustered_by(), 944 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 945 exp.CollateProperty, **kwargs 946 ), 947 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 948 "CONTAINS": lambda self: self._parse_contains_property(), 949 "COPY": lambda self: self._parse_copy_property(), 950 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 951 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 952 "DEFINER": lambda self: self._parse_definer(), 953 "DETERMINISTIC": lambda self: self.expression( 954 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 955 ), 956 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 957 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 958 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 959 "DISTKEY": lambda self: self._parse_distkey(), 960 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 961 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 962 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 963 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 964 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 965 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 966 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 967 "FREESPACE": lambda self: self._parse_freespace(), 968 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 969 "HEAP": lambda self: self.expression(exp.HeapProperty), 970 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 971 "IMMUTABLE": lambda self: self.expression( 972 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 973 ), 974 "INHERITS": lambda self: self.expression( 975 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 976 ), 977 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 978 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 979 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 980 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 981 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 982 "LIKE": lambda self: self._parse_create_like(), 983 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 984 "LOCK": lambda self: self._parse_locking(), 985 "LOCKING": lambda self: self._parse_locking(), 986 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 987 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 988 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 989 "MODIFIES": lambda self: self._parse_modifies_property(), 990 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 991 "NO": lambda self: self._parse_no_property(), 992 "ON": lambda self: self._parse_on_property(), 993 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 994 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 995 "PARTITION": lambda self: self._parse_partitioned_of(), 996 "PARTITION BY": lambda self: self._parse_partitioned_by(), 997 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 998 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 999 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1000 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1001 "READS": lambda self: self._parse_reads_property(), 1002 "REMOTE": lambda self: self._parse_remote_with_connection(), 1003 "RETURNS": lambda self: self._parse_returns(), 1004 "STRICT": lambda self: self.expression(exp.StrictProperty), 1005 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1006 "ROW": lambda self: self._parse_row(), 1007 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1008 "SAMPLE": lambda self: self.expression( 1009 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1010 ), 1011 "SECURE": lambda self: self.expression(exp.SecureProperty), 1012 "SECURITY": lambda self: self._parse_security(), 1013 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1014 "SETTINGS": lambda self: self._parse_settings_property(), 1015 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1016 "SORTKEY": lambda self: self._parse_sortkey(), 1017 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1018 "STABLE": lambda self: self.expression( 1019 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1020 ), 1021 "STORED": lambda self: self._parse_stored(), 1022 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1023 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1024 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1025 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1026 "TO": lambda self: self._parse_to_table(), 1027 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1028 "TRANSFORM": lambda self: self.expression( 1029 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1030 ), 1031 "TTL": lambda self: self._parse_ttl(), 1032 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1033 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1034 "VOLATILE": lambda self: self._parse_volatile_property(), 1035 "WITH": lambda self: self._parse_with_property(), 1036 } 1037 1038 CONSTRAINT_PARSERS = { 1039 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1040 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1041 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1042 "CHARACTER SET": lambda self: self.expression( 1043 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1044 ), 1045 "CHECK": lambda self: self.expression( 1046 exp.CheckColumnConstraint, 1047 this=self._parse_wrapped(self._parse_assignment), 1048 enforced=self._match_text_seq("ENFORCED"), 1049 ), 1050 "COLLATE": lambda self: self.expression( 1051 exp.CollateColumnConstraint, 1052 this=self._parse_identifier() or self._parse_column(), 1053 ), 1054 "COMMENT": lambda self: self.expression( 1055 exp.CommentColumnConstraint, this=self._parse_string() 1056 ), 1057 "COMPRESS": lambda self: self._parse_compress(), 1058 "CLUSTERED": lambda self: self.expression( 1059 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1060 ), 1061 "NONCLUSTERED": lambda self: self.expression( 1062 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1063 ), 1064 "DEFAULT": lambda self: self.expression( 1065 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1066 ), 1067 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1068 "EPHEMERAL": lambda self: self.expression( 1069 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1070 ), 1071 "EXCLUDE": lambda self: self.expression( 1072 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1073 ), 1074 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1075 "FORMAT": lambda self: self.expression( 1076 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1077 ), 1078 "GENERATED": lambda self: self._parse_generated_as_identity(), 1079 "IDENTITY": lambda self: self._parse_auto_increment(), 1080 "INLINE": lambda self: self._parse_inline(), 1081 "LIKE": lambda self: self._parse_create_like(), 1082 "NOT": lambda self: self._parse_not_constraint(), 1083 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1084 "ON": lambda self: ( 1085 self._match(TokenType.UPDATE) 1086 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1087 ) 1088 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1089 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1090 "PERIOD": lambda self: self._parse_period_for_system_time(), 1091 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1092 "REFERENCES": lambda self: self._parse_references(match=False), 1093 "TITLE": lambda self: self.expression( 1094 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1095 ), 1096 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1097 "UNIQUE": lambda self: self._parse_unique(), 1098 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1099 "WATERMARK": lambda self: self.expression( 1100 exp.WatermarkColumnConstraint, 1101 this=self._match(TokenType.FOR) and self._parse_column(), 1102 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1103 ), 1104 "WITH": lambda self: self.expression( 1105 exp.Properties, expressions=self._parse_wrapped_properties() 1106 ), 1107 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1108 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1109 } 1110 1111 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1112 klass = ( 1113 exp.PartitionedByBucket 1114 if self._prev.text.upper() == "BUCKET" 1115 else exp.PartitionByTruncate 1116 ) 1117 1118 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1119 this, expression = seq_get(args, 0), seq_get(args, 1) 1120 1121 if isinstance(this, exp.Literal): 1122 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1123 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1124 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1125 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1126 # 1127 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1128 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1129 this, expression = expression, this 1130 1131 return self.expression(klass, this=this, expression=expression) 1132 1133 ALTER_PARSERS = { 1134 "ADD": lambda self: self._parse_alter_table_add(), 1135 "AS": lambda self: self._parse_select(), 1136 "ALTER": lambda self: self._parse_alter_table_alter(), 1137 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1138 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1139 "DROP": lambda self: self._parse_alter_table_drop(), 1140 "RENAME": lambda self: self._parse_alter_table_rename(), 1141 "SET": lambda self: self._parse_alter_table_set(), 1142 "SWAP": lambda self: self.expression( 1143 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1144 ), 1145 } 1146 1147 ALTER_ALTER_PARSERS = { 1148 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1149 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1150 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1151 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1152 } 1153 1154 SCHEMA_UNNAMED_CONSTRAINTS = { 1155 "CHECK", 1156 "EXCLUDE", 1157 "FOREIGN KEY", 1158 "LIKE", 1159 "PERIOD", 1160 "PRIMARY KEY", 1161 "UNIQUE", 1162 "WATERMARK", 1163 "BUCKET", 1164 "TRUNCATE", 1165 } 1166 1167 NO_PAREN_FUNCTION_PARSERS = { 1168 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1169 "CASE": lambda self: self._parse_case(), 1170 "CONNECT_BY_ROOT": lambda self: self.expression( 1171 exp.ConnectByRoot, this=self._parse_column() 1172 ), 1173 "IF": lambda self: self._parse_if(), 1174 } 1175 1176 INVALID_FUNC_NAME_TOKENS = { 1177 TokenType.IDENTIFIER, 1178 TokenType.STRING, 1179 } 1180 1181 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1182 1183 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1184 1185 FUNCTION_PARSERS = { 1186 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1187 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1188 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1189 "DECODE": lambda self: self._parse_decode(), 1190 "EXTRACT": lambda self: self._parse_extract(), 1191 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1192 "GAP_FILL": lambda self: self._parse_gap_fill(), 1193 "JSON_OBJECT": lambda self: self._parse_json_object(), 1194 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1195 "JSON_TABLE": lambda self: self._parse_json_table(), 1196 "MATCH": lambda self: self._parse_match_against(), 1197 "NORMALIZE": lambda self: self._parse_normalize(), 1198 "OPENJSON": lambda self: self._parse_open_json(), 1199 "OVERLAY": lambda self: self._parse_overlay(), 1200 "POSITION": lambda self: self._parse_position(), 1201 "PREDICT": lambda self: self._parse_predict(), 1202 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1203 "STRING_AGG": lambda self: self._parse_string_agg(), 1204 "SUBSTRING": lambda self: self._parse_substring(), 1205 "TRIM": lambda self: self._parse_trim(), 1206 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1207 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1208 "XMLELEMENT": lambda self: self.expression( 1209 exp.XMLElement, 1210 this=self._match_text_seq("NAME") and self._parse_id_var(), 1211 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1212 ), 1213 "XMLTABLE": lambda self: self._parse_xml_table(), 1214 } 1215 1216 QUERY_MODIFIER_PARSERS = { 1217 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1218 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1219 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1220 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1221 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1222 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1223 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1224 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1225 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1226 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1227 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1228 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1229 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1230 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1231 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1232 TokenType.CLUSTER_BY: lambda self: ( 1233 "cluster", 1234 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1235 ), 1236 TokenType.DISTRIBUTE_BY: lambda self: ( 1237 "distribute", 1238 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1239 ), 1240 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1241 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1242 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1243 } 1244 1245 SET_PARSERS = { 1246 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1247 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1248 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1249 "TRANSACTION": lambda self: self._parse_set_transaction(), 1250 } 1251 1252 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1253 1254 TYPE_LITERAL_PARSERS = { 1255 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1256 } 1257 1258 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1259 1260 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1261 1262 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1263 1264 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1265 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1266 "ISOLATION": ( 1267 ("LEVEL", "REPEATABLE", "READ"), 1268 ("LEVEL", "READ", "COMMITTED"), 1269 ("LEVEL", "READ", "UNCOMITTED"), 1270 ("LEVEL", "SERIALIZABLE"), 1271 ), 1272 "READ": ("WRITE", "ONLY"), 1273 } 1274 1275 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1276 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1277 ) 1278 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1279 1280 CREATE_SEQUENCE: OPTIONS_TYPE = { 1281 "SCALE": ("EXTEND", "NOEXTEND"), 1282 "SHARD": ("EXTEND", "NOEXTEND"), 1283 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1284 **dict.fromkeys( 1285 ( 1286 "SESSION", 1287 "GLOBAL", 1288 "KEEP", 1289 "NOKEEP", 1290 "ORDER", 1291 "NOORDER", 1292 "NOCACHE", 1293 "CYCLE", 1294 "NOCYCLE", 1295 "NOMINVALUE", 1296 "NOMAXVALUE", 1297 "NOSCALE", 1298 "NOSHARD", 1299 ), 1300 tuple(), 1301 ), 1302 } 1303 1304 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1305 1306 USABLES: OPTIONS_TYPE = dict.fromkeys( 1307 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1308 ) 1309 1310 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1311 1312 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1313 "TYPE": ("EVOLUTION",), 1314 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1315 } 1316 1317 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1318 1319 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1320 1321 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1322 "NOT": ("ENFORCED",), 1323 "MATCH": ( 1324 "FULL", 1325 "PARTIAL", 1326 "SIMPLE", 1327 ), 1328 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1329 "USING": ( 1330 "BTREE", 1331 "HASH", 1332 ), 1333 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1334 } 1335 1336 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1337 1338 CLONE_KEYWORDS = {"CLONE", "COPY"} 1339 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1340 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1341 1342 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1343 1344 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1345 1346 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1347 1348 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1349 1350 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1351 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1352 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1353 1354 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1355 1356 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1357 1358 ADD_CONSTRAINT_TOKENS = { 1359 TokenType.CONSTRAINT, 1360 TokenType.FOREIGN_KEY, 1361 TokenType.INDEX, 1362 TokenType.KEY, 1363 TokenType.PRIMARY_KEY, 1364 TokenType.UNIQUE, 1365 } 1366 1367 DISTINCT_TOKENS = {TokenType.DISTINCT} 1368 1369 NULL_TOKENS = {TokenType.NULL} 1370 1371 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1372 1373 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1374 1375 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1376 1377 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1378 1379 ODBC_DATETIME_LITERALS = { 1380 "d": exp.Date, 1381 "t": exp.Time, 1382 "ts": exp.Timestamp, 1383 } 1384 1385 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1386 1387 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1388 1389 # The style options for the DESCRIBE statement 1390 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1391 1392 # The style options for the ANALYZE statement 1393 ANALYZE_STYLES = { 1394 "BUFFER_USAGE_LIMIT", 1395 "FULL", 1396 "LOCAL", 1397 "NO_WRITE_TO_BINLOG", 1398 "SAMPLE", 1399 "SKIP_LOCKED", 1400 "VERBOSE", 1401 } 1402 1403 ANALYZE_EXPRESSION_PARSERS = { 1404 "ALL": lambda self: self._parse_analyze_columns(), 1405 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1406 "DELETE": lambda self: self._parse_analyze_delete(), 1407 "DROP": lambda self: self._parse_analyze_histogram(), 1408 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1409 "LIST": lambda self: self._parse_analyze_list(), 1410 "PREDICATE": lambda self: self._parse_analyze_columns(), 1411 "UPDATE": lambda self: self._parse_analyze_histogram(), 1412 "VALIDATE": lambda self: self._parse_analyze_validate(), 1413 } 1414 1415 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1416 1417 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1418 1419 OPERATION_MODIFIERS: t.Set[str] = set() 1420 1421 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1422 1423 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1424 1425 STRICT_CAST = True 1426 1427 PREFIXED_PIVOT_COLUMNS = False 1428 IDENTIFY_PIVOT_STRINGS = False 1429 1430 LOG_DEFAULTS_TO_LN = False 1431 1432 # Whether ADD is present for each column added by ALTER TABLE 1433 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1434 1435 # Whether the table sample clause expects CSV syntax 1436 TABLESAMPLE_CSV = False 1437 1438 # The default method used for table sampling 1439 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1440 1441 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1442 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1443 1444 # Whether the TRIM function expects the characters to trim as its first argument 1445 TRIM_PATTERN_FIRST = False 1446 1447 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1448 STRING_ALIASES = False 1449 1450 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1451 MODIFIERS_ATTACHED_TO_SET_OP = True 1452 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1453 1454 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1455 NO_PAREN_IF_COMMANDS = True 1456 1457 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1458 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1459 1460 # Whether the `:` operator is used to extract a value from a VARIANT column 1461 COLON_IS_VARIANT_EXTRACT = False 1462 1463 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1464 # If this is True and '(' is not found, the keyword will be treated as an identifier 1465 VALUES_FOLLOWED_BY_PAREN = True 1466 1467 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1468 SUPPORTS_IMPLICIT_UNNEST = False 1469 1470 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1471 INTERVAL_SPANS = True 1472 1473 # Whether a PARTITION clause can follow a table reference 1474 SUPPORTS_PARTITION_SELECTION = False 1475 1476 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1477 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1478 1479 # Whether the 'AS' keyword is optional in the CTE definition syntax 1480 OPTIONAL_ALIAS_TOKEN_CTE = True 1481 1482 __slots__ = ( 1483 "error_level", 1484 "error_message_context", 1485 "max_errors", 1486 "dialect", 1487 "sql", 1488 "errors", 1489 "_tokens", 1490 "_index", 1491 "_curr", 1492 "_next", 1493 "_prev", 1494 "_prev_comments", 1495 ) 1496 1497 # Autofilled 1498 SHOW_TRIE: t.Dict = {} 1499 SET_TRIE: t.Dict = {} 1500 1501 def __init__( 1502 self, 1503 error_level: t.Optional[ErrorLevel] = None, 1504 error_message_context: int = 100, 1505 max_errors: int = 3, 1506 dialect: DialectType = None, 1507 ): 1508 from sqlglot.dialects import Dialect 1509 1510 self.error_level = error_level or ErrorLevel.IMMEDIATE 1511 self.error_message_context = error_message_context 1512 self.max_errors = max_errors 1513 self.dialect = Dialect.get_or_raise(dialect) 1514 self.reset() 1515 1516 def reset(self): 1517 self.sql = "" 1518 self.errors = [] 1519 self._tokens = [] 1520 self._index = 0 1521 self._curr = None 1522 self._next = None 1523 self._prev = None 1524 self._prev_comments = None 1525 1526 def parse( 1527 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1528 ) -> t.List[t.Optional[exp.Expression]]: 1529 """ 1530 Parses a list of tokens and returns a list of syntax trees, one tree 1531 per parsed SQL statement. 1532 1533 Args: 1534 raw_tokens: The list of tokens. 1535 sql: The original SQL string, used to produce helpful debug messages. 1536 1537 Returns: 1538 The list of the produced syntax trees. 1539 """ 1540 return self._parse( 1541 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1542 ) 1543 1544 def parse_into( 1545 self, 1546 expression_types: exp.IntoType, 1547 raw_tokens: t.List[Token], 1548 sql: t.Optional[str] = None, 1549 ) -> t.List[t.Optional[exp.Expression]]: 1550 """ 1551 Parses a list of tokens into a given Expression type. If a collection of Expression 1552 types is given instead, this method will try to parse the token list into each one 1553 of them, stopping at the first for which the parsing succeeds. 1554 1555 Args: 1556 expression_types: The expression type(s) to try and parse the token list into. 1557 raw_tokens: The list of tokens. 1558 sql: The original SQL string, used to produce helpful debug messages. 1559 1560 Returns: 1561 The target Expression. 1562 """ 1563 errors = [] 1564 for expression_type in ensure_list(expression_types): 1565 parser = self.EXPRESSION_PARSERS.get(expression_type) 1566 if not parser: 1567 raise TypeError(f"No parser registered for {expression_type}") 1568 1569 try: 1570 return self._parse(parser, raw_tokens, sql) 1571 except ParseError as e: 1572 e.errors[0]["into_expression"] = expression_type 1573 errors.append(e) 1574 1575 raise ParseError( 1576 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1577 errors=merge_errors(errors), 1578 ) from errors[-1] 1579 1580 def _parse( 1581 self, 1582 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1583 raw_tokens: t.List[Token], 1584 sql: t.Optional[str] = None, 1585 ) -> t.List[t.Optional[exp.Expression]]: 1586 self.reset() 1587 self.sql = sql or "" 1588 1589 total = len(raw_tokens) 1590 chunks: t.List[t.List[Token]] = [[]] 1591 1592 for i, token in enumerate(raw_tokens): 1593 if token.token_type == TokenType.SEMICOLON: 1594 if token.comments: 1595 chunks.append([token]) 1596 1597 if i < total - 1: 1598 chunks.append([]) 1599 else: 1600 chunks[-1].append(token) 1601 1602 expressions = [] 1603 1604 for tokens in chunks: 1605 self._index = -1 1606 self._tokens = tokens 1607 self._advance() 1608 1609 expressions.append(parse_method(self)) 1610 1611 if self._index < len(self._tokens): 1612 self.raise_error("Invalid expression / Unexpected token") 1613 1614 self.check_errors() 1615 1616 return expressions 1617 1618 def check_errors(self) -> None: 1619 """Logs or raises any found errors, depending on the chosen error level setting.""" 1620 if self.error_level == ErrorLevel.WARN: 1621 for error in self.errors: 1622 logger.error(str(error)) 1623 elif self.error_level == ErrorLevel.RAISE and self.errors: 1624 raise ParseError( 1625 concat_messages(self.errors, self.max_errors), 1626 errors=merge_errors(self.errors), 1627 ) 1628 1629 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1630 """ 1631 Appends an error in the list of recorded errors or raises it, depending on the chosen 1632 error level setting. 1633 """ 1634 token = token or self._curr or self._prev or Token.string("") 1635 start = token.start 1636 end = token.end + 1 1637 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1638 highlight = self.sql[start:end] 1639 end_context = self.sql[end : end + self.error_message_context] 1640 1641 error = ParseError.new( 1642 f"{message}. Line {token.line}, Col: {token.col}.\n" 1643 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1644 description=message, 1645 line=token.line, 1646 col=token.col, 1647 start_context=start_context, 1648 highlight=highlight, 1649 end_context=end_context, 1650 ) 1651 1652 if self.error_level == ErrorLevel.IMMEDIATE: 1653 raise error 1654 1655 self.errors.append(error) 1656 1657 def expression( 1658 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1659 ) -> E: 1660 """ 1661 Creates a new, validated Expression. 1662 1663 Args: 1664 exp_class: The expression class to instantiate. 1665 comments: An optional list of comments to attach to the expression. 1666 kwargs: The arguments to set for the expression along with their respective values. 1667 1668 Returns: 1669 The target expression. 1670 """ 1671 instance = exp_class(**kwargs) 1672 instance.add_comments(comments) if comments else self._add_comments(instance) 1673 return self.validate_expression(instance) 1674 1675 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1676 if expression and self._prev_comments: 1677 expression.add_comments(self._prev_comments) 1678 self._prev_comments = None 1679 1680 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1681 """ 1682 Validates an Expression, making sure that all its mandatory arguments are set. 1683 1684 Args: 1685 expression: The expression to validate. 1686 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1687 1688 Returns: 1689 The validated expression. 1690 """ 1691 if self.error_level != ErrorLevel.IGNORE: 1692 for error_message in expression.error_messages(args): 1693 self.raise_error(error_message) 1694 1695 return expression 1696 1697 def _find_sql(self, start: Token, end: Token) -> str: 1698 return self.sql[start.start : end.end + 1] 1699 1700 def _is_connected(self) -> bool: 1701 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1702 1703 def _advance(self, times: int = 1) -> None: 1704 self._index += times 1705 self._curr = seq_get(self._tokens, self._index) 1706 self._next = seq_get(self._tokens, self._index + 1) 1707 1708 if self._index > 0: 1709 self._prev = self._tokens[self._index - 1] 1710 self._prev_comments = self._prev.comments 1711 else: 1712 self._prev = None 1713 self._prev_comments = None 1714 1715 def _retreat(self, index: int) -> None: 1716 if index != self._index: 1717 self._advance(index - self._index) 1718 1719 def _warn_unsupported(self) -> None: 1720 if len(self._tokens) <= 1: 1721 return 1722 1723 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1724 # interested in emitting a warning for the one being currently processed. 1725 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1726 1727 logger.warning( 1728 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1729 ) 1730 1731 def _parse_command(self) -> exp.Command: 1732 self._warn_unsupported() 1733 return self.expression( 1734 exp.Command, 1735 comments=self._prev_comments, 1736 this=self._prev.text.upper(), 1737 expression=self._parse_string(), 1738 ) 1739 1740 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1741 """ 1742 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1743 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1744 solve this by setting & resetting the parser state accordingly 1745 """ 1746 index = self._index 1747 error_level = self.error_level 1748 1749 self.error_level = ErrorLevel.IMMEDIATE 1750 try: 1751 this = parse_method() 1752 except ParseError: 1753 this = None 1754 finally: 1755 if not this or retreat: 1756 self._retreat(index) 1757 self.error_level = error_level 1758 1759 return this 1760 1761 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1762 start = self._prev 1763 exists = self._parse_exists() if allow_exists else None 1764 1765 self._match(TokenType.ON) 1766 1767 materialized = self._match_text_seq("MATERIALIZED") 1768 kind = self._match_set(self.CREATABLES) and self._prev 1769 if not kind: 1770 return self._parse_as_command(start) 1771 1772 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1773 this = self._parse_user_defined_function(kind=kind.token_type) 1774 elif kind.token_type == TokenType.TABLE: 1775 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1776 elif kind.token_type == TokenType.COLUMN: 1777 this = self._parse_column() 1778 else: 1779 this = self._parse_id_var() 1780 1781 self._match(TokenType.IS) 1782 1783 return self.expression( 1784 exp.Comment, 1785 this=this, 1786 kind=kind.text, 1787 expression=self._parse_string(), 1788 exists=exists, 1789 materialized=materialized, 1790 ) 1791 1792 def _parse_to_table( 1793 self, 1794 ) -> exp.ToTableProperty: 1795 table = self._parse_table_parts(schema=True) 1796 return self.expression(exp.ToTableProperty, this=table) 1797 1798 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1799 def _parse_ttl(self) -> exp.Expression: 1800 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1801 this = self._parse_bitwise() 1802 1803 if self._match_text_seq("DELETE"): 1804 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1805 if self._match_text_seq("RECOMPRESS"): 1806 return self.expression( 1807 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1808 ) 1809 if self._match_text_seq("TO", "DISK"): 1810 return self.expression( 1811 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1812 ) 1813 if self._match_text_seq("TO", "VOLUME"): 1814 return self.expression( 1815 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1816 ) 1817 1818 return this 1819 1820 expressions = self._parse_csv(_parse_ttl_action) 1821 where = self._parse_where() 1822 group = self._parse_group() 1823 1824 aggregates = None 1825 if group and self._match(TokenType.SET): 1826 aggregates = self._parse_csv(self._parse_set_item) 1827 1828 return self.expression( 1829 exp.MergeTreeTTL, 1830 expressions=expressions, 1831 where=where, 1832 group=group, 1833 aggregates=aggregates, 1834 ) 1835 1836 def _parse_statement(self) -> t.Optional[exp.Expression]: 1837 if self._curr is None: 1838 return None 1839 1840 if self._match_set(self.STATEMENT_PARSERS): 1841 comments = self._prev_comments 1842 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1843 stmt.add_comments(comments, prepend=True) 1844 return stmt 1845 1846 if self._match_set(self.dialect.tokenizer.COMMANDS): 1847 return self._parse_command() 1848 1849 expression = self._parse_expression() 1850 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1851 return self._parse_query_modifiers(expression) 1852 1853 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1854 start = self._prev 1855 temporary = self._match(TokenType.TEMPORARY) 1856 materialized = self._match_text_seq("MATERIALIZED") 1857 1858 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1859 if not kind: 1860 return self._parse_as_command(start) 1861 1862 concurrently = self._match_text_seq("CONCURRENTLY") 1863 if_exists = exists or self._parse_exists() 1864 1865 if kind == "COLUMN": 1866 this = self._parse_column() 1867 else: 1868 this = self._parse_table_parts( 1869 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1870 ) 1871 1872 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1873 1874 if self._match(TokenType.L_PAREN, advance=False): 1875 expressions = self._parse_wrapped_csv(self._parse_types) 1876 else: 1877 expressions = None 1878 1879 return self.expression( 1880 exp.Drop, 1881 exists=if_exists, 1882 this=this, 1883 expressions=expressions, 1884 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1885 temporary=temporary, 1886 materialized=materialized, 1887 cascade=self._match_text_seq("CASCADE"), 1888 constraints=self._match_text_seq("CONSTRAINTS"), 1889 purge=self._match_text_seq("PURGE"), 1890 cluster=cluster, 1891 concurrently=concurrently, 1892 ) 1893 1894 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1895 return ( 1896 self._match_text_seq("IF") 1897 and (not not_ or self._match(TokenType.NOT)) 1898 and self._match(TokenType.EXISTS) 1899 ) 1900 1901 def _parse_create(self) -> exp.Create | exp.Command: 1902 # Note: this can't be None because we've matched a statement parser 1903 start = self._prev 1904 1905 replace = ( 1906 start.token_type == TokenType.REPLACE 1907 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1908 or self._match_pair(TokenType.OR, TokenType.ALTER) 1909 ) 1910 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1911 1912 unique = self._match(TokenType.UNIQUE) 1913 1914 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1915 clustered = True 1916 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1917 "COLUMNSTORE" 1918 ): 1919 clustered = False 1920 else: 1921 clustered = None 1922 1923 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1924 self._advance() 1925 1926 properties = None 1927 create_token = self._match_set(self.CREATABLES) and self._prev 1928 1929 if not create_token: 1930 # exp.Properties.Location.POST_CREATE 1931 properties = self._parse_properties() 1932 create_token = self._match_set(self.CREATABLES) and self._prev 1933 1934 if not properties or not create_token: 1935 return self._parse_as_command(start) 1936 1937 concurrently = self._match_text_seq("CONCURRENTLY") 1938 exists = self._parse_exists(not_=True) 1939 this = None 1940 expression: t.Optional[exp.Expression] = None 1941 indexes = None 1942 no_schema_binding = None 1943 begin = None 1944 end = None 1945 clone = None 1946 1947 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1948 nonlocal properties 1949 if properties and temp_props: 1950 properties.expressions.extend(temp_props.expressions) 1951 elif temp_props: 1952 properties = temp_props 1953 1954 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1955 this = self._parse_user_defined_function(kind=create_token.token_type) 1956 1957 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1958 extend_props(self._parse_properties()) 1959 1960 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1961 extend_props(self._parse_properties()) 1962 1963 if not expression: 1964 if self._match(TokenType.COMMAND): 1965 expression = self._parse_as_command(self._prev) 1966 else: 1967 begin = self._match(TokenType.BEGIN) 1968 return_ = self._match_text_seq("RETURN") 1969 1970 if self._match(TokenType.STRING, advance=False): 1971 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1972 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1973 expression = self._parse_string() 1974 extend_props(self._parse_properties()) 1975 else: 1976 expression = self._parse_user_defined_function_expression() 1977 1978 end = self._match_text_seq("END") 1979 1980 if return_: 1981 expression = self.expression(exp.Return, this=expression) 1982 elif create_token.token_type == TokenType.INDEX: 1983 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1984 if not self._match(TokenType.ON): 1985 index = self._parse_id_var() 1986 anonymous = False 1987 else: 1988 index = None 1989 anonymous = True 1990 1991 this = self._parse_index(index=index, anonymous=anonymous) 1992 elif create_token.token_type in self.DB_CREATABLES: 1993 table_parts = self._parse_table_parts( 1994 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1995 ) 1996 1997 # exp.Properties.Location.POST_NAME 1998 self._match(TokenType.COMMA) 1999 extend_props(self._parse_properties(before=True)) 2000 2001 this = self._parse_schema(this=table_parts) 2002 2003 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2004 extend_props(self._parse_properties()) 2005 2006 has_alias = self._match(TokenType.ALIAS) 2007 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2008 # exp.Properties.Location.POST_ALIAS 2009 extend_props(self._parse_properties()) 2010 2011 if create_token.token_type == TokenType.SEQUENCE: 2012 expression = self._parse_types() 2013 extend_props(self._parse_properties()) 2014 else: 2015 expression = self._parse_ddl_select() 2016 2017 # Some dialects also support using a table as an alias instead of a SELECT. 2018 # Here we fallback to this as an alternative. 2019 if not expression and has_alias: 2020 expression = self._try_parse(self._parse_table_parts) 2021 2022 if create_token.token_type == TokenType.TABLE: 2023 # exp.Properties.Location.POST_EXPRESSION 2024 extend_props(self._parse_properties()) 2025 2026 indexes = [] 2027 while True: 2028 index = self._parse_index() 2029 2030 # exp.Properties.Location.POST_INDEX 2031 extend_props(self._parse_properties()) 2032 if not index: 2033 break 2034 else: 2035 self._match(TokenType.COMMA) 2036 indexes.append(index) 2037 elif create_token.token_type == TokenType.VIEW: 2038 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2039 no_schema_binding = True 2040 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2041 extend_props(self._parse_properties()) 2042 2043 shallow = self._match_text_seq("SHALLOW") 2044 2045 if self._match_texts(self.CLONE_KEYWORDS): 2046 copy = self._prev.text.lower() == "copy" 2047 clone = self.expression( 2048 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2049 ) 2050 2051 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2052 return self._parse_as_command(start) 2053 2054 create_kind_text = create_token.text.upper() 2055 return self.expression( 2056 exp.Create, 2057 this=this, 2058 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2059 replace=replace, 2060 refresh=refresh, 2061 unique=unique, 2062 expression=expression, 2063 exists=exists, 2064 properties=properties, 2065 indexes=indexes, 2066 no_schema_binding=no_schema_binding, 2067 begin=begin, 2068 end=end, 2069 clone=clone, 2070 concurrently=concurrently, 2071 clustered=clustered, 2072 ) 2073 2074 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2075 seq = exp.SequenceProperties() 2076 2077 options = [] 2078 index = self._index 2079 2080 while self._curr: 2081 self._match(TokenType.COMMA) 2082 if self._match_text_seq("INCREMENT"): 2083 self._match_text_seq("BY") 2084 self._match_text_seq("=") 2085 seq.set("increment", self._parse_term()) 2086 elif self._match_text_seq("MINVALUE"): 2087 seq.set("minvalue", self._parse_term()) 2088 elif self._match_text_seq("MAXVALUE"): 2089 seq.set("maxvalue", self._parse_term()) 2090 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2091 self._match_text_seq("=") 2092 seq.set("start", self._parse_term()) 2093 elif self._match_text_seq("CACHE"): 2094 # T-SQL allows empty CACHE which is initialized dynamically 2095 seq.set("cache", self._parse_number() or True) 2096 elif self._match_text_seq("OWNED", "BY"): 2097 # "OWNED BY NONE" is the default 2098 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2099 else: 2100 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2101 if opt: 2102 options.append(opt) 2103 else: 2104 break 2105 2106 seq.set("options", options if options else None) 2107 return None if self._index == index else seq 2108 2109 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2110 # only used for teradata currently 2111 self._match(TokenType.COMMA) 2112 2113 kwargs = { 2114 "no": self._match_text_seq("NO"), 2115 "dual": self._match_text_seq("DUAL"), 2116 "before": self._match_text_seq("BEFORE"), 2117 "default": self._match_text_seq("DEFAULT"), 2118 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2119 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2120 "after": self._match_text_seq("AFTER"), 2121 "minimum": self._match_texts(("MIN", "MINIMUM")), 2122 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2123 } 2124 2125 if self._match_texts(self.PROPERTY_PARSERS): 2126 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2127 try: 2128 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2129 except TypeError: 2130 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2131 2132 return None 2133 2134 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2135 return self._parse_wrapped_csv(self._parse_property) 2136 2137 def _parse_property(self) -> t.Optional[exp.Expression]: 2138 if self._match_texts(self.PROPERTY_PARSERS): 2139 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2140 2141 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2142 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2143 2144 if self._match_text_seq("COMPOUND", "SORTKEY"): 2145 return self._parse_sortkey(compound=True) 2146 2147 if self._match_text_seq("SQL", "SECURITY"): 2148 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2149 2150 index = self._index 2151 key = self._parse_column() 2152 2153 if not self._match(TokenType.EQ): 2154 self._retreat(index) 2155 return self._parse_sequence_properties() 2156 2157 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2158 if isinstance(key, exp.Column): 2159 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2160 2161 value = self._parse_bitwise() or self._parse_var(any_token=True) 2162 2163 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2164 if isinstance(value, exp.Column): 2165 value = exp.var(value.name) 2166 2167 return self.expression(exp.Property, this=key, value=value) 2168 2169 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2170 if self._match_text_seq("BY"): 2171 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2172 2173 self._match(TokenType.ALIAS) 2174 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2175 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2176 2177 return self.expression( 2178 exp.FileFormatProperty, 2179 this=( 2180 self.expression( 2181 exp.InputOutputFormat, 2182 input_format=input_format, 2183 output_format=output_format, 2184 ) 2185 if input_format or output_format 2186 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2187 ), 2188 ) 2189 2190 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2191 field = self._parse_field() 2192 if isinstance(field, exp.Identifier) and not field.quoted: 2193 field = exp.var(field) 2194 2195 return field 2196 2197 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2198 self._match(TokenType.EQ) 2199 self._match(TokenType.ALIAS) 2200 2201 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2202 2203 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2204 properties = [] 2205 while True: 2206 if before: 2207 prop = self._parse_property_before() 2208 else: 2209 prop = self._parse_property() 2210 if not prop: 2211 break 2212 for p in ensure_list(prop): 2213 properties.append(p) 2214 2215 if properties: 2216 return self.expression(exp.Properties, expressions=properties) 2217 2218 return None 2219 2220 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2221 return self.expression( 2222 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2223 ) 2224 2225 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2226 if self._match_texts(("DEFINER", "INVOKER")): 2227 security_specifier = self._prev.text.upper() 2228 return self.expression(exp.SecurityProperty, this=security_specifier) 2229 return None 2230 2231 def _parse_settings_property(self) -> exp.SettingsProperty: 2232 return self.expression( 2233 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2234 ) 2235 2236 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2237 if self._index >= 2: 2238 pre_volatile_token = self._tokens[self._index - 2] 2239 else: 2240 pre_volatile_token = None 2241 2242 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2243 return exp.VolatileProperty() 2244 2245 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2246 2247 def _parse_retention_period(self) -> exp.Var: 2248 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2249 number = self._parse_number() 2250 number_str = f"{number} " if number else "" 2251 unit = self._parse_var(any_token=True) 2252 return exp.var(f"{number_str}{unit}") 2253 2254 def _parse_system_versioning_property( 2255 self, with_: bool = False 2256 ) -> exp.WithSystemVersioningProperty: 2257 self._match(TokenType.EQ) 2258 prop = self.expression( 2259 exp.WithSystemVersioningProperty, 2260 **{ # type: ignore 2261 "on": True, 2262 "with": with_, 2263 }, 2264 ) 2265 2266 if self._match_text_seq("OFF"): 2267 prop.set("on", False) 2268 return prop 2269 2270 self._match(TokenType.ON) 2271 if self._match(TokenType.L_PAREN): 2272 while self._curr and not self._match(TokenType.R_PAREN): 2273 if self._match_text_seq("HISTORY_TABLE", "="): 2274 prop.set("this", self._parse_table_parts()) 2275 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2276 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2277 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2278 prop.set("retention_period", self._parse_retention_period()) 2279 2280 self._match(TokenType.COMMA) 2281 2282 return prop 2283 2284 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2285 self._match(TokenType.EQ) 2286 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2287 prop = self.expression(exp.DataDeletionProperty, on=on) 2288 2289 if self._match(TokenType.L_PAREN): 2290 while self._curr and not self._match(TokenType.R_PAREN): 2291 if self._match_text_seq("FILTER_COLUMN", "="): 2292 prop.set("filter_column", self._parse_column()) 2293 elif self._match_text_seq("RETENTION_PERIOD", "="): 2294 prop.set("retention_period", self._parse_retention_period()) 2295 2296 self._match(TokenType.COMMA) 2297 2298 return prop 2299 2300 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2301 kind = "HASH" 2302 expressions: t.Optional[t.List[exp.Expression]] = None 2303 if self._match_text_seq("BY", "HASH"): 2304 expressions = self._parse_wrapped_csv(self._parse_id_var) 2305 elif self._match_text_seq("BY", "RANDOM"): 2306 kind = "RANDOM" 2307 2308 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2309 buckets: t.Optional[exp.Expression] = None 2310 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2311 buckets = self._parse_number() 2312 2313 return self.expression( 2314 exp.DistributedByProperty, 2315 expressions=expressions, 2316 kind=kind, 2317 buckets=buckets, 2318 order=self._parse_order(), 2319 ) 2320 2321 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2322 self._match_text_seq("KEY") 2323 expressions = self._parse_wrapped_id_vars() 2324 return self.expression(expr_type, expressions=expressions) 2325 2326 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2327 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2328 prop = self._parse_system_versioning_property(with_=True) 2329 self._match_r_paren() 2330 return prop 2331 2332 if self._match(TokenType.L_PAREN, advance=False): 2333 return self._parse_wrapped_properties() 2334 2335 if self._match_text_seq("JOURNAL"): 2336 return self._parse_withjournaltable() 2337 2338 if self._match_texts(self.VIEW_ATTRIBUTES): 2339 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2340 2341 if self._match_text_seq("DATA"): 2342 return self._parse_withdata(no=False) 2343 elif self._match_text_seq("NO", "DATA"): 2344 return self._parse_withdata(no=True) 2345 2346 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2347 return self._parse_serde_properties(with_=True) 2348 2349 if self._match(TokenType.SCHEMA): 2350 return self.expression( 2351 exp.WithSchemaBindingProperty, 2352 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2353 ) 2354 2355 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2356 return self.expression( 2357 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2358 ) 2359 2360 if not self._next: 2361 return None 2362 2363 return self._parse_withisolatedloading() 2364 2365 def _parse_procedure_option(self) -> exp.Expression | None: 2366 if self._match_text_seq("EXECUTE", "AS"): 2367 return self.expression( 2368 exp.ExecuteAsProperty, 2369 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2370 or self._parse_string(), 2371 ) 2372 2373 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2374 2375 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2376 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2377 self._match(TokenType.EQ) 2378 2379 user = self._parse_id_var() 2380 self._match(TokenType.PARAMETER) 2381 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2382 2383 if not user or not host: 2384 return None 2385 2386 return exp.DefinerProperty(this=f"{user}@{host}") 2387 2388 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2389 self._match(TokenType.TABLE) 2390 self._match(TokenType.EQ) 2391 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2392 2393 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2394 return self.expression(exp.LogProperty, no=no) 2395 2396 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2397 return self.expression(exp.JournalProperty, **kwargs) 2398 2399 def _parse_checksum(self) -> exp.ChecksumProperty: 2400 self._match(TokenType.EQ) 2401 2402 on = None 2403 if self._match(TokenType.ON): 2404 on = True 2405 elif self._match_text_seq("OFF"): 2406 on = False 2407 2408 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2409 2410 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2411 return self.expression( 2412 exp.Cluster, 2413 expressions=( 2414 self._parse_wrapped_csv(self._parse_ordered) 2415 if wrapped 2416 else self._parse_csv(self._parse_ordered) 2417 ), 2418 ) 2419 2420 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2421 self._match_text_seq("BY") 2422 2423 self._match_l_paren() 2424 expressions = self._parse_csv(self._parse_column) 2425 self._match_r_paren() 2426 2427 if self._match_text_seq("SORTED", "BY"): 2428 self._match_l_paren() 2429 sorted_by = self._parse_csv(self._parse_ordered) 2430 self._match_r_paren() 2431 else: 2432 sorted_by = None 2433 2434 self._match(TokenType.INTO) 2435 buckets = self._parse_number() 2436 self._match_text_seq("BUCKETS") 2437 2438 return self.expression( 2439 exp.ClusteredByProperty, 2440 expressions=expressions, 2441 sorted_by=sorted_by, 2442 buckets=buckets, 2443 ) 2444 2445 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2446 if not self._match_text_seq("GRANTS"): 2447 self._retreat(self._index - 1) 2448 return None 2449 2450 return self.expression(exp.CopyGrantsProperty) 2451 2452 def _parse_freespace(self) -> exp.FreespaceProperty: 2453 self._match(TokenType.EQ) 2454 return self.expression( 2455 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2456 ) 2457 2458 def _parse_mergeblockratio( 2459 self, no: bool = False, default: bool = False 2460 ) -> exp.MergeBlockRatioProperty: 2461 if self._match(TokenType.EQ): 2462 return self.expression( 2463 exp.MergeBlockRatioProperty, 2464 this=self._parse_number(), 2465 percent=self._match(TokenType.PERCENT), 2466 ) 2467 2468 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2469 2470 def _parse_datablocksize( 2471 self, 2472 default: t.Optional[bool] = None, 2473 minimum: t.Optional[bool] = None, 2474 maximum: t.Optional[bool] = None, 2475 ) -> exp.DataBlocksizeProperty: 2476 self._match(TokenType.EQ) 2477 size = self._parse_number() 2478 2479 units = None 2480 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2481 units = self._prev.text 2482 2483 return self.expression( 2484 exp.DataBlocksizeProperty, 2485 size=size, 2486 units=units, 2487 default=default, 2488 minimum=minimum, 2489 maximum=maximum, 2490 ) 2491 2492 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2493 self._match(TokenType.EQ) 2494 always = self._match_text_seq("ALWAYS") 2495 manual = self._match_text_seq("MANUAL") 2496 never = self._match_text_seq("NEVER") 2497 default = self._match_text_seq("DEFAULT") 2498 2499 autotemp = None 2500 if self._match_text_seq("AUTOTEMP"): 2501 autotemp = self._parse_schema() 2502 2503 return self.expression( 2504 exp.BlockCompressionProperty, 2505 always=always, 2506 manual=manual, 2507 never=never, 2508 default=default, 2509 autotemp=autotemp, 2510 ) 2511 2512 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2513 index = self._index 2514 no = self._match_text_seq("NO") 2515 concurrent = self._match_text_seq("CONCURRENT") 2516 2517 if not self._match_text_seq("ISOLATED", "LOADING"): 2518 self._retreat(index) 2519 return None 2520 2521 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2522 return self.expression( 2523 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2524 ) 2525 2526 def _parse_locking(self) -> exp.LockingProperty: 2527 if self._match(TokenType.TABLE): 2528 kind = "TABLE" 2529 elif self._match(TokenType.VIEW): 2530 kind = "VIEW" 2531 elif self._match(TokenType.ROW): 2532 kind = "ROW" 2533 elif self._match_text_seq("DATABASE"): 2534 kind = "DATABASE" 2535 else: 2536 kind = None 2537 2538 if kind in ("DATABASE", "TABLE", "VIEW"): 2539 this = self._parse_table_parts() 2540 else: 2541 this = None 2542 2543 if self._match(TokenType.FOR): 2544 for_or_in = "FOR" 2545 elif self._match(TokenType.IN): 2546 for_or_in = "IN" 2547 else: 2548 for_or_in = None 2549 2550 if self._match_text_seq("ACCESS"): 2551 lock_type = "ACCESS" 2552 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2553 lock_type = "EXCLUSIVE" 2554 elif self._match_text_seq("SHARE"): 2555 lock_type = "SHARE" 2556 elif self._match_text_seq("READ"): 2557 lock_type = "READ" 2558 elif self._match_text_seq("WRITE"): 2559 lock_type = "WRITE" 2560 elif self._match_text_seq("CHECKSUM"): 2561 lock_type = "CHECKSUM" 2562 else: 2563 lock_type = None 2564 2565 override = self._match_text_seq("OVERRIDE") 2566 2567 return self.expression( 2568 exp.LockingProperty, 2569 this=this, 2570 kind=kind, 2571 for_or_in=for_or_in, 2572 lock_type=lock_type, 2573 override=override, 2574 ) 2575 2576 def _parse_partition_by(self) -> t.List[exp.Expression]: 2577 if self._match(TokenType.PARTITION_BY): 2578 return self._parse_csv(self._parse_assignment) 2579 return [] 2580 2581 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2582 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2583 if self._match_text_seq("MINVALUE"): 2584 return exp.var("MINVALUE") 2585 if self._match_text_seq("MAXVALUE"): 2586 return exp.var("MAXVALUE") 2587 return self._parse_bitwise() 2588 2589 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2590 expression = None 2591 from_expressions = None 2592 to_expressions = None 2593 2594 if self._match(TokenType.IN): 2595 this = self._parse_wrapped_csv(self._parse_bitwise) 2596 elif self._match(TokenType.FROM): 2597 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2598 self._match_text_seq("TO") 2599 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2600 elif self._match_text_seq("WITH", "(", "MODULUS"): 2601 this = self._parse_number() 2602 self._match_text_seq(",", "REMAINDER") 2603 expression = self._parse_number() 2604 self._match_r_paren() 2605 else: 2606 self.raise_error("Failed to parse partition bound spec.") 2607 2608 return self.expression( 2609 exp.PartitionBoundSpec, 2610 this=this, 2611 expression=expression, 2612 from_expressions=from_expressions, 2613 to_expressions=to_expressions, 2614 ) 2615 2616 # https://www.postgresql.org/docs/current/sql-createtable.html 2617 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2618 if not self._match_text_seq("OF"): 2619 self._retreat(self._index - 1) 2620 return None 2621 2622 this = self._parse_table(schema=True) 2623 2624 if self._match(TokenType.DEFAULT): 2625 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2626 elif self._match_text_seq("FOR", "VALUES"): 2627 expression = self._parse_partition_bound_spec() 2628 else: 2629 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2630 2631 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2632 2633 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2634 self._match(TokenType.EQ) 2635 return self.expression( 2636 exp.PartitionedByProperty, 2637 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2638 ) 2639 2640 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2641 if self._match_text_seq("AND", "STATISTICS"): 2642 statistics = True 2643 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2644 statistics = False 2645 else: 2646 statistics = None 2647 2648 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2649 2650 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2651 if self._match_text_seq("SQL"): 2652 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2653 return None 2654 2655 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2656 if self._match_text_seq("SQL", "DATA"): 2657 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2658 return None 2659 2660 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2661 if self._match_text_seq("PRIMARY", "INDEX"): 2662 return exp.NoPrimaryIndexProperty() 2663 if self._match_text_seq("SQL"): 2664 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2665 return None 2666 2667 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2668 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2669 return exp.OnCommitProperty() 2670 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2671 return exp.OnCommitProperty(delete=True) 2672 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2673 2674 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2675 if self._match_text_seq("SQL", "DATA"): 2676 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2677 return None 2678 2679 def _parse_distkey(self) -> exp.DistKeyProperty: 2680 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2681 2682 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2683 table = self._parse_table(schema=True) 2684 2685 options = [] 2686 while self._match_texts(("INCLUDING", "EXCLUDING")): 2687 this = self._prev.text.upper() 2688 2689 id_var = self._parse_id_var() 2690 if not id_var: 2691 return None 2692 2693 options.append( 2694 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2695 ) 2696 2697 return self.expression(exp.LikeProperty, this=table, expressions=options) 2698 2699 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2700 return self.expression( 2701 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2702 ) 2703 2704 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2705 self._match(TokenType.EQ) 2706 return self.expression( 2707 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2708 ) 2709 2710 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2711 self._match_text_seq("WITH", "CONNECTION") 2712 return self.expression( 2713 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2714 ) 2715 2716 def _parse_returns(self) -> exp.ReturnsProperty: 2717 value: t.Optional[exp.Expression] 2718 null = None 2719 is_table = self._match(TokenType.TABLE) 2720 2721 if is_table: 2722 if self._match(TokenType.LT): 2723 value = self.expression( 2724 exp.Schema, 2725 this="TABLE", 2726 expressions=self._parse_csv(self._parse_struct_types), 2727 ) 2728 if not self._match(TokenType.GT): 2729 self.raise_error("Expecting >") 2730 else: 2731 value = self._parse_schema(exp.var("TABLE")) 2732 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2733 null = True 2734 value = None 2735 else: 2736 value = self._parse_types() 2737 2738 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2739 2740 def _parse_describe(self) -> exp.Describe: 2741 kind = self._match_set(self.CREATABLES) and self._prev.text 2742 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2743 if self._match(TokenType.DOT): 2744 style = None 2745 self._retreat(self._index - 2) 2746 2747 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2748 2749 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2750 this = self._parse_statement() 2751 else: 2752 this = self._parse_table(schema=True) 2753 2754 properties = self._parse_properties() 2755 expressions = properties.expressions if properties else None 2756 partition = self._parse_partition() 2757 return self.expression( 2758 exp.Describe, 2759 this=this, 2760 style=style, 2761 kind=kind, 2762 expressions=expressions, 2763 partition=partition, 2764 format=format, 2765 ) 2766 2767 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2768 kind = self._prev.text.upper() 2769 expressions = [] 2770 2771 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2772 if self._match(TokenType.WHEN): 2773 expression = self._parse_disjunction() 2774 self._match(TokenType.THEN) 2775 else: 2776 expression = None 2777 2778 else_ = self._match(TokenType.ELSE) 2779 2780 if not self._match(TokenType.INTO): 2781 return None 2782 2783 return self.expression( 2784 exp.ConditionalInsert, 2785 this=self.expression( 2786 exp.Insert, 2787 this=self._parse_table(schema=True), 2788 expression=self._parse_derived_table_values(), 2789 ), 2790 expression=expression, 2791 else_=else_, 2792 ) 2793 2794 expression = parse_conditional_insert() 2795 while expression is not None: 2796 expressions.append(expression) 2797 expression = parse_conditional_insert() 2798 2799 return self.expression( 2800 exp.MultitableInserts, 2801 kind=kind, 2802 comments=comments, 2803 expressions=expressions, 2804 source=self._parse_table(), 2805 ) 2806 2807 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2808 comments = [] 2809 hint = self._parse_hint() 2810 overwrite = self._match(TokenType.OVERWRITE) 2811 ignore = self._match(TokenType.IGNORE) 2812 local = self._match_text_seq("LOCAL") 2813 alternative = None 2814 is_function = None 2815 2816 if self._match_text_seq("DIRECTORY"): 2817 this: t.Optional[exp.Expression] = self.expression( 2818 exp.Directory, 2819 this=self._parse_var_or_string(), 2820 local=local, 2821 row_format=self._parse_row_format(match_row=True), 2822 ) 2823 else: 2824 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2825 comments += ensure_list(self._prev_comments) 2826 return self._parse_multitable_inserts(comments) 2827 2828 if self._match(TokenType.OR): 2829 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2830 2831 self._match(TokenType.INTO) 2832 comments += ensure_list(self._prev_comments) 2833 self._match(TokenType.TABLE) 2834 is_function = self._match(TokenType.FUNCTION) 2835 2836 this = ( 2837 self._parse_table(schema=True, parse_partition=True) 2838 if not is_function 2839 else self._parse_function() 2840 ) 2841 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2842 this.set("alias", self._parse_table_alias()) 2843 2844 returning = self._parse_returning() 2845 2846 return self.expression( 2847 exp.Insert, 2848 comments=comments, 2849 hint=hint, 2850 is_function=is_function, 2851 this=this, 2852 stored=self._match_text_seq("STORED") and self._parse_stored(), 2853 by_name=self._match_text_seq("BY", "NAME"), 2854 exists=self._parse_exists(), 2855 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2856 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2857 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2858 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2859 conflict=self._parse_on_conflict(), 2860 returning=returning or self._parse_returning(), 2861 overwrite=overwrite, 2862 alternative=alternative, 2863 ignore=ignore, 2864 source=self._match(TokenType.TABLE) and self._parse_table(), 2865 ) 2866 2867 def _parse_kill(self) -> exp.Kill: 2868 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2869 2870 return self.expression( 2871 exp.Kill, 2872 this=self._parse_primary(), 2873 kind=kind, 2874 ) 2875 2876 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2877 conflict = self._match_text_seq("ON", "CONFLICT") 2878 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2879 2880 if not conflict and not duplicate: 2881 return None 2882 2883 conflict_keys = None 2884 constraint = None 2885 2886 if conflict: 2887 if self._match_text_seq("ON", "CONSTRAINT"): 2888 constraint = self._parse_id_var() 2889 elif self._match(TokenType.L_PAREN): 2890 conflict_keys = self._parse_csv(self._parse_id_var) 2891 self._match_r_paren() 2892 2893 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2894 if self._prev.token_type == TokenType.UPDATE: 2895 self._match(TokenType.SET) 2896 expressions = self._parse_csv(self._parse_equality) 2897 else: 2898 expressions = None 2899 2900 return self.expression( 2901 exp.OnConflict, 2902 duplicate=duplicate, 2903 expressions=expressions, 2904 action=action, 2905 conflict_keys=conflict_keys, 2906 constraint=constraint, 2907 where=self._parse_where(), 2908 ) 2909 2910 def _parse_returning(self) -> t.Optional[exp.Returning]: 2911 if not self._match(TokenType.RETURNING): 2912 return None 2913 return self.expression( 2914 exp.Returning, 2915 expressions=self._parse_csv(self._parse_expression), 2916 into=self._match(TokenType.INTO) and self._parse_table_part(), 2917 ) 2918 2919 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2920 if not self._match(TokenType.FORMAT): 2921 return None 2922 return self._parse_row_format() 2923 2924 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2925 index = self._index 2926 with_ = with_ or self._match_text_seq("WITH") 2927 2928 if not self._match(TokenType.SERDE_PROPERTIES): 2929 self._retreat(index) 2930 return None 2931 return self.expression( 2932 exp.SerdeProperties, 2933 **{ # type: ignore 2934 "expressions": self._parse_wrapped_properties(), 2935 "with": with_, 2936 }, 2937 ) 2938 2939 def _parse_row_format( 2940 self, match_row: bool = False 2941 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2942 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2943 return None 2944 2945 if self._match_text_seq("SERDE"): 2946 this = self._parse_string() 2947 2948 serde_properties = self._parse_serde_properties() 2949 2950 return self.expression( 2951 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2952 ) 2953 2954 self._match_text_seq("DELIMITED") 2955 2956 kwargs = {} 2957 2958 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2959 kwargs["fields"] = self._parse_string() 2960 if self._match_text_seq("ESCAPED", "BY"): 2961 kwargs["escaped"] = self._parse_string() 2962 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2963 kwargs["collection_items"] = self._parse_string() 2964 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2965 kwargs["map_keys"] = self._parse_string() 2966 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2967 kwargs["lines"] = self._parse_string() 2968 if self._match_text_seq("NULL", "DEFINED", "AS"): 2969 kwargs["null"] = self._parse_string() 2970 2971 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2972 2973 def _parse_load(self) -> exp.LoadData | exp.Command: 2974 if self._match_text_seq("DATA"): 2975 local = self._match_text_seq("LOCAL") 2976 self._match_text_seq("INPATH") 2977 inpath = self._parse_string() 2978 overwrite = self._match(TokenType.OVERWRITE) 2979 self._match_pair(TokenType.INTO, TokenType.TABLE) 2980 2981 return self.expression( 2982 exp.LoadData, 2983 this=self._parse_table(schema=True), 2984 local=local, 2985 overwrite=overwrite, 2986 inpath=inpath, 2987 partition=self._parse_partition(), 2988 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2989 serde=self._match_text_seq("SERDE") and self._parse_string(), 2990 ) 2991 return self._parse_as_command(self._prev) 2992 2993 def _parse_delete(self) -> exp.Delete: 2994 # This handles MySQL's "Multiple-Table Syntax" 2995 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2996 tables = None 2997 if not self._match(TokenType.FROM, advance=False): 2998 tables = self._parse_csv(self._parse_table) or None 2999 3000 returning = self._parse_returning() 3001 3002 return self.expression( 3003 exp.Delete, 3004 tables=tables, 3005 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3006 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3007 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3008 where=self._parse_where(), 3009 returning=returning or self._parse_returning(), 3010 limit=self._parse_limit(), 3011 ) 3012 3013 def _parse_update(self) -> exp.Update: 3014 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3015 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3016 returning = self._parse_returning() 3017 return self.expression( 3018 exp.Update, 3019 **{ # type: ignore 3020 "this": this, 3021 "expressions": expressions, 3022 "from": self._parse_from(joins=True), 3023 "where": self._parse_where(), 3024 "returning": returning or self._parse_returning(), 3025 "order": self._parse_order(), 3026 "limit": self._parse_limit(), 3027 }, 3028 ) 3029 3030 def _parse_use(self) -> exp.Use: 3031 return self.expression( 3032 exp.Use, 3033 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3034 this=self._parse_table(schema=False), 3035 ) 3036 3037 def _parse_uncache(self) -> exp.Uncache: 3038 if not self._match(TokenType.TABLE): 3039 self.raise_error("Expecting TABLE after UNCACHE") 3040 3041 return self.expression( 3042 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3043 ) 3044 3045 def _parse_cache(self) -> exp.Cache: 3046 lazy = self._match_text_seq("LAZY") 3047 self._match(TokenType.TABLE) 3048 table = self._parse_table(schema=True) 3049 3050 options = [] 3051 if self._match_text_seq("OPTIONS"): 3052 self._match_l_paren() 3053 k = self._parse_string() 3054 self._match(TokenType.EQ) 3055 v = self._parse_string() 3056 options = [k, v] 3057 self._match_r_paren() 3058 3059 self._match(TokenType.ALIAS) 3060 return self.expression( 3061 exp.Cache, 3062 this=table, 3063 lazy=lazy, 3064 options=options, 3065 expression=self._parse_select(nested=True), 3066 ) 3067 3068 def _parse_partition(self) -> t.Optional[exp.Partition]: 3069 if not self._match_texts(self.PARTITION_KEYWORDS): 3070 return None 3071 3072 return self.expression( 3073 exp.Partition, 3074 subpartition=self._prev.text.upper() == "SUBPARTITION", 3075 expressions=self._parse_wrapped_csv(self._parse_assignment), 3076 ) 3077 3078 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3079 def _parse_value_expression() -> t.Optional[exp.Expression]: 3080 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3081 return exp.var(self._prev.text.upper()) 3082 return self._parse_expression() 3083 3084 if self._match(TokenType.L_PAREN): 3085 expressions = self._parse_csv(_parse_value_expression) 3086 self._match_r_paren() 3087 return self.expression(exp.Tuple, expressions=expressions) 3088 3089 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3090 expression = self._parse_expression() 3091 if expression: 3092 return self.expression(exp.Tuple, expressions=[expression]) 3093 return None 3094 3095 def _parse_projections(self) -> t.List[exp.Expression]: 3096 return self._parse_expressions() 3097 3098 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3099 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3100 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3101 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3102 ) 3103 elif self._match(TokenType.FROM): 3104 from_ = self._parse_from(skip_from_token=True) 3105 # Support parentheses for duckdb FROM-first syntax 3106 select = self._parse_select() 3107 if select: 3108 select.set("from", from_) 3109 this = select 3110 else: 3111 this = exp.select("*").from_(t.cast(exp.From, from_)) 3112 else: 3113 this = ( 3114 self._parse_table() 3115 if table 3116 else self._parse_select(nested=True, parse_set_operation=False) 3117 ) 3118 3119 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3120 # in case a modifier (e.g. join) is following 3121 if table and isinstance(this, exp.Values) and this.alias: 3122 alias = this.args["alias"].pop() 3123 this = exp.Table(this=this, alias=alias) 3124 3125 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3126 3127 return this 3128 3129 def _parse_select( 3130 self, 3131 nested: bool = False, 3132 table: bool = False, 3133 parse_subquery_alias: bool = True, 3134 parse_set_operation: bool = True, 3135 ) -> t.Optional[exp.Expression]: 3136 cte = self._parse_with() 3137 3138 if cte: 3139 this = self._parse_statement() 3140 3141 if not this: 3142 self.raise_error("Failed to parse any statement following CTE") 3143 return cte 3144 3145 if "with" in this.arg_types: 3146 this.set("with", cte) 3147 else: 3148 self.raise_error(f"{this.key} does not support CTE") 3149 this = cte 3150 3151 return this 3152 3153 # duckdb supports leading with FROM x 3154 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3155 3156 if self._match(TokenType.SELECT): 3157 comments = self._prev_comments 3158 3159 hint = self._parse_hint() 3160 3161 if self._next and not self._next.token_type == TokenType.DOT: 3162 all_ = self._match(TokenType.ALL) 3163 distinct = self._match_set(self.DISTINCT_TOKENS) 3164 else: 3165 all_, distinct = None, None 3166 3167 kind = ( 3168 self._match(TokenType.ALIAS) 3169 and self._match_texts(("STRUCT", "VALUE")) 3170 and self._prev.text.upper() 3171 ) 3172 3173 if distinct: 3174 distinct = self.expression( 3175 exp.Distinct, 3176 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3177 ) 3178 3179 if all_ and distinct: 3180 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3181 3182 operation_modifiers = [] 3183 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3184 operation_modifiers.append(exp.var(self._prev.text.upper())) 3185 3186 limit = self._parse_limit(top=True) 3187 projections = self._parse_projections() 3188 3189 this = self.expression( 3190 exp.Select, 3191 kind=kind, 3192 hint=hint, 3193 distinct=distinct, 3194 expressions=projections, 3195 limit=limit, 3196 operation_modifiers=operation_modifiers or None, 3197 ) 3198 this.comments = comments 3199 3200 into = self._parse_into() 3201 if into: 3202 this.set("into", into) 3203 3204 if not from_: 3205 from_ = self._parse_from() 3206 3207 if from_: 3208 this.set("from", from_) 3209 3210 this = self._parse_query_modifiers(this) 3211 elif (table or nested) and self._match(TokenType.L_PAREN): 3212 this = self._parse_wrapped_select(table=table) 3213 3214 # We return early here so that the UNION isn't attached to the subquery by the 3215 # following call to _parse_set_operations, but instead becomes the parent node 3216 self._match_r_paren() 3217 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3218 elif self._match(TokenType.VALUES, advance=False): 3219 this = self._parse_derived_table_values() 3220 elif from_: 3221 this = exp.select("*").from_(from_.this, copy=False) 3222 elif self._match(TokenType.SUMMARIZE): 3223 table = self._match(TokenType.TABLE) 3224 this = self._parse_select() or self._parse_string() or self._parse_table() 3225 return self.expression(exp.Summarize, this=this, table=table) 3226 elif self._match(TokenType.DESCRIBE): 3227 this = self._parse_describe() 3228 elif self._match_text_seq("STREAM"): 3229 this = self._parse_function() 3230 if this: 3231 this = self.expression(exp.Stream, this=this) 3232 else: 3233 self._retreat(self._index - 1) 3234 else: 3235 this = None 3236 3237 return self._parse_set_operations(this) if parse_set_operation else this 3238 3239 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3240 self._match_text_seq("SEARCH") 3241 3242 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3243 3244 if not kind: 3245 return None 3246 3247 self._match_text_seq("FIRST", "BY") 3248 3249 return self.expression( 3250 exp.RecursiveWithSearch, 3251 kind=kind, 3252 this=self._parse_id_var(), 3253 expression=self._match_text_seq("SET") and self._parse_id_var(), 3254 using=self._match_text_seq("USING") and self._parse_id_var(), 3255 ) 3256 3257 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3258 if not skip_with_token and not self._match(TokenType.WITH): 3259 return None 3260 3261 comments = self._prev_comments 3262 recursive = self._match(TokenType.RECURSIVE) 3263 3264 last_comments = None 3265 expressions = [] 3266 while True: 3267 cte = self._parse_cte() 3268 if isinstance(cte, exp.CTE): 3269 expressions.append(cte) 3270 if last_comments: 3271 cte.add_comments(last_comments) 3272 3273 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3274 break 3275 else: 3276 self._match(TokenType.WITH) 3277 3278 last_comments = self._prev_comments 3279 3280 return self.expression( 3281 exp.With, 3282 comments=comments, 3283 expressions=expressions, 3284 recursive=recursive, 3285 search=self._parse_recursive_with_search(), 3286 ) 3287 3288 def _parse_cte(self) -> t.Optional[exp.CTE]: 3289 index = self._index 3290 3291 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3292 if not alias or not alias.this: 3293 self.raise_error("Expected CTE to have alias") 3294 3295 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3296 self._retreat(index) 3297 return None 3298 3299 comments = self._prev_comments 3300 3301 if self._match_text_seq("NOT", "MATERIALIZED"): 3302 materialized = False 3303 elif self._match_text_seq("MATERIALIZED"): 3304 materialized = True 3305 else: 3306 materialized = None 3307 3308 cte = self.expression( 3309 exp.CTE, 3310 this=self._parse_wrapped(self._parse_statement), 3311 alias=alias, 3312 materialized=materialized, 3313 comments=comments, 3314 ) 3315 3316 if isinstance(cte.this, exp.Values): 3317 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3318 3319 return cte 3320 3321 def _parse_table_alias( 3322 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3323 ) -> t.Optional[exp.TableAlias]: 3324 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3325 # so this section tries to parse the clause version and if it fails, it treats the token 3326 # as an identifier (alias) 3327 if self._can_parse_limit_or_offset(): 3328 return None 3329 3330 any_token = self._match(TokenType.ALIAS) 3331 alias = ( 3332 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3333 or self._parse_string_as_identifier() 3334 ) 3335 3336 index = self._index 3337 if self._match(TokenType.L_PAREN): 3338 columns = self._parse_csv(self._parse_function_parameter) 3339 self._match_r_paren() if columns else self._retreat(index) 3340 else: 3341 columns = None 3342 3343 if not alias and not columns: 3344 return None 3345 3346 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3347 3348 # We bubble up comments from the Identifier to the TableAlias 3349 if isinstance(alias, exp.Identifier): 3350 table_alias.add_comments(alias.pop_comments()) 3351 3352 return table_alias 3353 3354 def _parse_subquery( 3355 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3356 ) -> t.Optional[exp.Subquery]: 3357 if not this: 3358 return None 3359 3360 return self.expression( 3361 exp.Subquery, 3362 this=this, 3363 pivots=self._parse_pivots(), 3364 alias=self._parse_table_alias() if parse_alias else None, 3365 sample=self._parse_table_sample(), 3366 ) 3367 3368 def _implicit_unnests_to_explicit(self, this: E) -> E: 3369 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3370 3371 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3372 for i, join in enumerate(this.args.get("joins") or []): 3373 table = join.this 3374 normalized_table = table.copy() 3375 normalized_table.meta["maybe_column"] = True 3376 normalized_table = _norm(normalized_table, dialect=self.dialect) 3377 3378 if isinstance(table, exp.Table) and not join.args.get("on"): 3379 if normalized_table.parts[0].name in refs: 3380 table_as_column = table.to_column() 3381 unnest = exp.Unnest(expressions=[table_as_column]) 3382 3383 # Table.to_column creates a parent Alias node that we want to convert to 3384 # a TableAlias and attach to the Unnest, so it matches the parser's output 3385 if isinstance(table.args.get("alias"), exp.TableAlias): 3386 table_as_column.replace(table_as_column.this) 3387 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3388 3389 table.replace(unnest) 3390 3391 refs.add(normalized_table.alias_or_name) 3392 3393 return this 3394 3395 def _parse_query_modifiers( 3396 self, this: t.Optional[exp.Expression] 3397 ) -> t.Optional[exp.Expression]: 3398 if isinstance(this, self.MODIFIABLES): 3399 for join in self._parse_joins(): 3400 this.append("joins", join) 3401 for lateral in iter(self._parse_lateral, None): 3402 this.append("laterals", lateral) 3403 3404 while True: 3405 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3406 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3407 key, expression = parser(self) 3408 3409 if expression: 3410 this.set(key, expression) 3411 if key == "limit": 3412 offset = expression.args.pop("offset", None) 3413 3414 if offset: 3415 offset = exp.Offset(expression=offset) 3416 this.set("offset", offset) 3417 3418 limit_by_expressions = expression.expressions 3419 expression.set("expressions", None) 3420 offset.set("expressions", limit_by_expressions) 3421 continue 3422 break 3423 3424 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3425 this = self._implicit_unnests_to_explicit(this) 3426 3427 return this 3428 3429 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3430 start = self._curr 3431 while self._curr: 3432 self._advance() 3433 3434 end = self._tokens[self._index - 1] 3435 return exp.Hint(expressions=[self._find_sql(start, end)]) 3436 3437 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3438 return self._parse_function_call() 3439 3440 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3441 start_index = self._index 3442 should_fallback_to_string = False 3443 3444 hints = [] 3445 try: 3446 for hint in iter( 3447 lambda: self._parse_csv( 3448 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3449 ), 3450 [], 3451 ): 3452 hints.extend(hint) 3453 except ParseError: 3454 should_fallback_to_string = True 3455 3456 if should_fallback_to_string or self._curr: 3457 self._retreat(start_index) 3458 return self._parse_hint_fallback_to_string() 3459 3460 return self.expression(exp.Hint, expressions=hints) 3461 3462 def _parse_hint(self) -> t.Optional[exp.Hint]: 3463 if self._match(TokenType.HINT) and self._prev_comments: 3464 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3465 3466 return None 3467 3468 def _parse_into(self) -> t.Optional[exp.Into]: 3469 if not self._match(TokenType.INTO): 3470 return None 3471 3472 temp = self._match(TokenType.TEMPORARY) 3473 unlogged = self._match_text_seq("UNLOGGED") 3474 self._match(TokenType.TABLE) 3475 3476 return self.expression( 3477 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3478 ) 3479 3480 def _parse_from( 3481 self, joins: bool = False, skip_from_token: bool = False 3482 ) -> t.Optional[exp.From]: 3483 if not skip_from_token and not self._match(TokenType.FROM): 3484 return None 3485 3486 return self.expression( 3487 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3488 ) 3489 3490 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3491 return self.expression( 3492 exp.MatchRecognizeMeasure, 3493 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3494 this=self._parse_expression(), 3495 ) 3496 3497 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3498 if not self._match(TokenType.MATCH_RECOGNIZE): 3499 return None 3500 3501 self._match_l_paren() 3502 3503 partition = self._parse_partition_by() 3504 order = self._parse_order() 3505 3506 measures = ( 3507 self._parse_csv(self._parse_match_recognize_measure) 3508 if self._match_text_seq("MEASURES") 3509 else None 3510 ) 3511 3512 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3513 rows = exp.var("ONE ROW PER MATCH") 3514 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3515 text = "ALL ROWS PER MATCH" 3516 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3517 text += " SHOW EMPTY MATCHES" 3518 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3519 text += " OMIT EMPTY MATCHES" 3520 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3521 text += " WITH UNMATCHED ROWS" 3522 rows = exp.var(text) 3523 else: 3524 rows = None 3525 3526 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3527 text = "AFTER MATCH SKIP" 3528 if self._match_text_seq("PAST", "LAST", "ROW"): 3529 text += " PAST LAST ROW" 3530 elif self._match_text_seq("TO", "NEXT", "ROW"): 3531 text += " TO NEXT ROW" 3532 elif self._match_text_seq("TO", "FIRST"): 3533 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3534 elif self._match_text_seq("TO", "LAST"): 3535 text += f" TO LAST {self._advance_any().text}" # type: ignore 3536 after = exp.var(text) 3537 else: 3538 after = None 3539 3540 if self._match_text_seq("PATTERN"): 3541 self._match_l_paren() 3542 3543 if not self._curr: 3544 self.raise_error("Expecting )", self._curr) 3545 3546 paren = 1 3547 start = self._curr 3548 3549 while self._curr and paren > 0: 3550 if self._curr.token_type == TokenType.L_PAREN: 3551 paren += 1 3552 if self._curr.token_type == TokenType.R_PAREN: 3553 paren -= 1 3554 3555 end = self._prev 3556 self._advance() 3557 3558 if paren > 0: 3559 self.raise_error("Expecting )", self._curr) 3560 3561 pattern = exp.var(self._find_sql(start, end)) 3562 else: 3563 pattern = None 3564 3565 define = ( 3566 self._parse_csv(self._parse_name_as_expression) 3567 if self._match_text_seq("DEFINE") 3568 else None 3569 ) 3570 3571 self._match_r_paren() 3572 3573 return self.expression( 3574 exp.MatchRecognize, 3575 partition_by=partition, 3576 order=order, 3577 measures=measures, 3578 rows=rows, 3579 after=after, 3580 pattern=pattern, 3581 define=define, 3582 alias=self._parse_table_alias(), 3583 ) 3584 3585 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3586 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3587 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3588 cross_apply = False 3589 3590 if cross_apply is not None: 3591 this = self._parse_select(table=True) 3592 view = None 3593 outer = None 3594 elif self._match(TokenType.LATERAL): 3595 this = self._parse_select(table=True) 3596 view = self._match(TokenType.VIEW) 3597 outer = self._match(TokenType.OUTER) 3598 else: 3599 return None 3600 3601 if not this: 3602 this = ( 3603 self._parse_unnest() 3604 or self._parse_function() 3605 or self._parse_id_var(any_token=False) 3606 ) 3607 3608 while self._match(TokenType.DOT): 3609 this = exp.Dot( 3610 this=this, 3611 expression=self._parse_function() or self._parse_id_var(any_token=False), 3612 ) 3613 3614 ordinality: t.Optional[bool] = None 3615 3616 if view: 3617 table = self._parse_id_var(any_token=False) 3618 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3619 table_alias: t.Optional[exp.TableAlias] = self.expression( 3620 exp.TableAlias, this=table, columns=columns 3621 ) 3622 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3623 # We move the alias from the lateral's child node to the lateral itself 3624 table_alias = this.args["alias"].pop() 3625 else: 3626 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3627 table_alias = self._parse_table_alias() 3628 3629 return self.expression( 3630 exp.Lateral, 3631 this=this, 3632 view=view, 3633 outer=outer, 3634 alias=table_alias, 3635 cross_apply=cross_apply, 3636 ordinality=ordinality, 3637 ) 3638 3639 def _parse_join_parts( 3640 self, 3641 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3642 return ( 3643 self._match_set(self.JOIN_METHODS) and self._prev, 3644 self._match_set(self.JOIN_SIDES) and self._prev, 3645 self._match_set(self.JOIN_KINDS) and self._prev, 3646 ) 3647 3648 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3649 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3650 this = self._parse_column() 3651 if isinstance(this, exp.Column): 3652 return this.this 3653 return this 3654 3655 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3656 3657 def _parse_join( 3658 self, skip_join_token: bool = False, parse_bracket: bool = False 3659 ) -> t.Optional[exp.Join]: 3660 if self._match(TokenType.COMMA): 3661 table = self._try_parse(self._parse_table) 3662 if table: 3663 return self.expression(exp.Join, this=table) 3664 return None 3665 3666 index = self._index 3667 method, side, kind = self._parse_join_parts() 3668 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3669 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3670 3671 if not skip_join_token and not join: 3672 self._retreat(index) 3673 kind = None 3674 method = None 3675 side = None 3676 3677 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3678 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3679 3680 if not skip_join_token and not join and not outer_apply and not cross_apply: 3681 return None 3682 3683 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3684 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3685 kwargs["expressions"] = self._parse_csv( 3686 lambda: self._parse_table(parse_bracket=parse_bracket) 3687 ) 3688 3689 if method: 3690 kwargs["method"] = method.text 3691 if side: 3692 kwargs["side"] = side.text 3693 if kind: 3694 kwargs["kind"] = kind.text 3695 if hint: 3696 kwargs["hint"] = hint 3697 3698 if self._match(TokenType.MATCH_CONDITION): 3699 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3700 3701 if self._match(TokenType.ON): 3702 kwargs["on"] = self._parse_assignment() 3703 elif self._match(TokenType.USING): 3704 kwargs["using"] = self._parse_using_identifiers() 3705 elif ( 3706 not (outer_apply or cross_apply) 3707 and not isinstance(kwargs["this"], exp.Unnest) 3708 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3709 ): 3710 index = self._index 3711 joins: t.Optional[list] = list(self._parse_joins()) 3712 3713 if joins and self._match(TokenType.ON): 3714 kwargs["on"] = self._parse_assignment() 3715 elif joins and self._match(TokenType.USING): 3716 kwargs["using"] = self._parse_using_identifiers() 3717 else: 3718 joins = None 3719 self._retreat(index) 3720 3721 kwargs["this"].set("joins", joins if joins else None) 3722 3723 comments = [c for token in (method, side, kind) if token for c in token.comments] 3724 return self.expression(exp.Join, comments=comments, **kwargs) 3725 3726 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3727 this = self._parse_assignment() 3728 3729 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3730 return this 3731 3732 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3733 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3734 3735 return this 3736 3737 def _parse_index_params(self) -> exp.IndexParameters: 3738 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3739 3740 if self._match(TokenType.L_PAREN, advance=False): 3741 columns = self._parse_wrapped_csv(self._parse_with_operator) 3742 else: 3743 columns = None 3744 3745 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3746 partition_by = self._parse_partition_by() 3747 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3748 tablespace = ( 3749 self._parse_var(any_token=True) 3750 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3751 else None 3752 ) 3753 where = self._parse_where() 3754 3755 on = self._parse_field() if self._match(TokenType.ON) else None 3756 3757 return self.expression( 3758 exp.IndexParameters, 3759 using=using, 3760 columns=columns, 3761 include=include, 3762 partition_by=partition_by, 3763 where=where, 3764 with_storage=with_storage, 3765 tablespace=tablespace, 3766 on=on, 3767 ) 3768 3769 def _parse_index( 3770 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3771 ) -> t.Optional[exp.Index]: 3772 if index or anonymous: 3773 unique = None 3774 primary = None 3775 amp = None 3776 3777 self._match(TokenType.ON) 3778 self._match(TokenType.TABLE) # hive 3779 table = self._parse_table_parts(schema=True) 3780 else: 3781 unique = self._match(TokenType.UNIQUE) 3782 primary = self._match_text_seq("PRIMARY") 3783 amp = self._match_text_seq("AMP") 3784 3785 if not self._match(TokenType.INDEX): 3786 return None 3787 3788 index = self._parse_id_var() 3789 table = None 3790 3791 params = self._parse_index_params() 3792 3793 return self.expression( 3794 exp.Index, 3795 this=index, 3796 table=table, 3797 unique=unique, 3798 primary=primary, 3799 amp=amp, 3800 params=params, 3801 ) 3802 3803 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3804 hints: t.List[exp.Expression] = [] 3805 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3806 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3807 hints.append( 3808 self.expression( 3809 exp.WithTableHint, 3810 expressions=self._parse_csv( 3811 lambda: self._parse_function() or self._parse_var(any_token=True) 3812 ), 3813 ) 3814 ) 3815 self._match_r_paren() 3816 else: 3817 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3818 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3819 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3820 3821 self._match_set((TokenType.INDEX, TokenType.KEY)) 3822 if self._match(TokenType.FOR): 3823 hint.set("target", self._advance_any() and self._prev.text.upper()) 3824 3825 hint.set("expressions", self._parse_wrapped_id_vars()) 3826 hints.append(hint) 3827 3828 return hints or None 3829 3830 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3831 return ( 3832 (not schema and self._parse_function(optional_parens=False)) 3833 or self._parse_id_var(any_token=False) 3834 or self._parse_string_as_identifier() 3835 or self._parse_placeholder() 3836 ) 3837 3838 def _parse_table_parts( 3839 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3840 ) -> exp.Table: 3841 catalog = None 3842 db = None 3843 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3844 3845 while self._match(TokenType.DOT): 3846 if catalog: 3847 # This allows nesting the table in arbitrarily many dot expressions if needed 3848 table = self.expression( 3849 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3850 ) 3851 else: 3852 catalog = db 3853 db = table 3854 # "" used for tsql FROM a..b case 3855 table = self._parse_table_part(schema=schema) or "" 3856 3857 if ( 3858 wildcard 3859 and self._is_connected() 3860 and (isinstance(table, exp.Identifier) or not table) 3861 and self._match(TokenType.STAR) 3862 ): 3863 if isinstance(table, exp.Identifier): 3864 table.args["this"] += "*" 3865 else: 3866 table = exp.Identifier(this="*") 3867 3868 # We bubble up comments from the Identifier to the Table 3869 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3870 3871 if is_db_reference: 3872 catalog = db 3873 db = table 3874 table = None 3875 3876 if not table and not is_db_reference: 3877 self.raise_error(f"Expected table name but got {self._curr}") 3878 if not db and is_db_reference: 3879 self.raise_error(f"Expected database name but got {self._curr}") 3880 3881 table = self.expression( 3882 exp.Table, 3883 comments=comments, 3884 this=table, 3885 db=db, 3886 catalog=catalog, 3887 ) 3888 3889 changes = self._parse_changes() 3890 if changes: 3891 table.set("changes", changes) 3892 3893 at_before = self._parse_historical_data() 3894 if at_before: 3895 table.set("when", at_before) 3896 3897 pivots = self._parse_pivots() 3898 if pivots: 3899 table.set("pivots", pivots) 3900 3901 return table 3902 3903 def _parse_table( 3904 self, 3905 schema: bool = False, 3906 joins: bool = False, 3907 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3908 parse_bracket: bool = False, 3909 is_db_reference: bool = False, 3910 parse_partition: bool = False, 3911 ) -> t.Optional[exp.Expression]: 3912 lateral = self._parse_lateral() 3913 if lateral: 3914 return lateral 3915 3916 unnest = self._parse_unnest() 3917 if unnest: 3918 return unnest 3919 3920 values = self._parse_derived_table_values() 3921 if values: 3922 return values 3923 3924 subquery = self._parse_select(table=True) 3925 if subquery: 3926 if not subquery.args.get("pivots"): 3927 subquery.set("pivots", self._parse_pivots()) 3928 return subquery 3929 3930 bracket = parse_bracket and self._parse_bracket(None) 3931 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3932 3933 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3934 self._parse_table 3935 ) 3936 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3937 3938 only = self._match(TokenType.ONLY) 3939 3940 this = t.cast( 3941 exp.Expression, 3942 bracket 3943 or rows_from 3944 or self._parse_bracket( 3945 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3946 ), 3947 ) 3948 3949 if only: 3950 this.set("only", only) 3951 3952 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3953 self._match_text_seq("*") 3954 3955 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3956 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3957 this.set("partition", self._parse_partition()) 3958 3959 if schema: 3960 return self._parse_schema(this=this) 3961 3962 version = self._parse_version() 3963 3964 if version: 3965 this.set("version", version) 3966 3967 if self.dialect.ALIAS_POST_TABLESAMPLE: 3968 this.set("sample", self._parse_table_sample()) 3969 3970 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3971 if alias: 3972 this.set("alias", alias) 3973 3974 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3975 return self.expression( 3976 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3977 ) 3978 3979 this.set("hints", self._parse_table_hints()) 3980 3981 if not this.args.get("pivots"): 3982 this.set("pivots", self._parse_pivots()) 3983 3984 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3985 this.set("sample", self._parse_table_sample()) 3986 3987 if joins: 3988 for join in self._parse_joins(): 3989 this.append("joins", join) 3990 3991 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3992 this.set("ordinality", True) 3993 this.set("alias", self._parse_table_alias()) 3994 3995 return this 3996 3997 def _parse_version(self) -> t.Optional[exp.Version]: 3998 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3999 this = "TIMESTAMP" 4000 elif self._match(TokenType.VERSION_SNAPSHOT): 4001 this = "VERSION" 4002 else: 4003 return None 4004 4005 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4006 kind = self._prev.text.upper() 4007 start = self._parse_bitwise() 4008 self._match_texts(("TO", "AND")) 4009 end = self._parse_bitwise() 4010 expression: t.Optional[exp.Expression] = self.expression( 4011 exp.Tuple, expressions=[start, end] 4012 ) 4013 elif self._match_text_seq("CONTAINED", "IN"): 4014 kind = "CONTAINED IN" 4015 expression = self.expression( 4016 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4017 ) 4018 elif self._match(TokenType.ALL): 4019 kind = "ALL" 4020 expression = None 4021 else: 4022 self._match_text_seq("AS", "OF") 4023 kind = "AS OF" 4024 expression = self._parse_type() 4025 4026 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4027 4028 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4029 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4030 index = self._index 4031 historical_data = None 4032 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4033 this = self._prev.text.upper() 4034 kind = ( 4035 self._match(TokenType.L_PAREN) 4036 and self._match_texts(self.HISTORICAL_DATA_KIND) 4037 and self._prev.text.upper() 4038 ) 4039 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4040 4041 if expression: 4042 self._match_r_paren() 4043 historical_data = self.expression( 4044 exp.HistoricalData, this=this, kind=kind, expression=expression 4045 ) 4046 else: 4047 self._retreat(index) 4048 4049 return historical_data 4050 4051 def _parse_changes(self) -> t.Optional[exp.Changes]: 4052 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4053 return None 4054 4055 information = self._parse_var(any_token=True) 4056 self._match_r_paren() 4057 4058 return self.expression( 4059 exp.Changes, 4060 information=information, 4061 at_before=self._parse_historical_data(), 4062 end=self._parse_historical_data(), 4063 ) 4064 4065 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4066 if not self._match(TokenType.UNNEST): 4067 return None 4068 4069 expressions = self._parse_wrapped_csv(self._parse_equality) 4070 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4071 4072 alias = self._parse_table_alias() if with_alias else None 4073 4074 if alias: 4075 if self.dialect.UNNEST_COLUMN_ONLY: 4076 if alias.args.get("columns"): 4077 self.raise_error("Unexpected extra column alias in unnest.") 4078 4079 alias.set("columns", [alias.this]) 4080 alias.set("this", None) 4081 4082 columns = alias.args.get("columns") or [] 4083 if offset and len(expressions) < len(columns): 4084 offset = columns.pop() 4085 4086 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4087 self._match(TokenType.ALIAS) 4088 offset = self._parse_id_var( 4089 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4090 ) or exp.to_identifier("offset") 4091 4092 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4093 4094 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4095 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4096 if not is_derived and not ( 4097 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4098 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4099 ): 4100 return None 4101 4102 expressions = self._parse_csv(self._parse_value) 4103 alias = self._parse_table_alias() 4104 4105 if is_derived: 4106 self._match_r_paren() 4107 4108 return self.expression( 4109 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4110 ) 4111 4112 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4113 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4114 as_modifier and self._match_text_seq("USING", "SAMPLE") 4115 ): 4116 return None 4117 4118 bucket_numerator = None 4119 bucket_denominator = None 4120 bucket_field = None 4121 percent = None 4122 size = None 4123 seed = None 4124 4125 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4126 matched_l_paren = self._match(TokenType.L_PAREN) 4127 4128 if self.TABLESAMPLE_CSV: 4129 num = None 4130 expressions = self._parse_csv(self._parse_primary) 4131 else: 4132 expressions = None 4133 num = ( 4134 self._parse_factor() 4135 if self._match(TokenType.NUMBER, advance=False) 4136 else self._parse_primary() or self._parse_placeholder() 4137 ) 4138 4139 if self._match_text_seq("BUCKET"): 4140 bucket_numerator = self._parse_number() 4141 self._match_text_seq("OUT", "OF") 4142 bucket_denominator = bucket_denominator = self._parse_number() 4143 self._match(TokenType.ON) 4144 bucket_field = self._parse_field() 4145 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4146 percent = num 4147 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4148 size = num 4149 else: 4150 percent = num 4151 4152 if matched_l_paren: 4153 self._match_r_paren() 4154 4155 if self._match(TokenType.L_PAREN): 4156 method = self._parse_var(upper=True) 4157 seed = self._match(TokenType.COMMA) and self._parse_number() 4158 self._match_r_paren() 4159 elif self._match_texts(("SEED", "REPEATABLE")): 4160 seed = self._parse_wrapped(self._parse_number) 4161 4162 if not method and self.DEFAULT_SAMPLING_METHOD: 4163 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4164 4165 return self.expression( 4166 exp.TableSample, 4167 expressions=expressions, 4168 method=method, 4169 bucket_numerator=bucket_numerator, 4170 bucket_denominator=bucket_denominator, 4171 bucket_field=bucket_field, 4172 percent=percent, 4173 size=size, 4174 seed=seed, 4175 ) 4176 4177 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4178 return list(iter(self._parse_pivot, None)) or None 4179 4180 def _parse_joins(self) -> t.Iterator[exp.Join]: 4181 return iter(self._parse_join, None) 4182 4183 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4184 if not self._match(TokenType.INTO): 4185 return None 4186 4187 return self.expression( 4188 exp.UnpivotColumns, 4189 this=self._match_text_seq("NAME") and self._parse_column(), 4190 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4191 ) 4192 4193 # https://duckdb.org/docs/sql/statements/pivot 4194 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4195 def _parse_on() -> t.Optional[exp.Expression]: 4196 this = self._parse_bitwise() 4197 4198 if self._match(TokenType.IN): 4199 # PIVOT ... ON col IN (row_val1, row_val2) 4200 return self._parse_in(this) 4201 if self._match(TokenType.ALIAS, advance=False): 4202 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4203 return self._parse_alias(this) 4204 4205 return this 4206 4207 this = self._parse_table() 4208 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4209 into = self._parse_unpivot_columns() 4210 using = self._match(TokenType.USING) and self._parse_csv( 4211 lambda: self._parse_alias(self._parse_function()) 4212 ) 4213 group = self._parse_group() 4214 4215 return self.expression( 4216 exp.Pivot, 4217 this=this, 4218 expressions=expressions, 4219 using=using, 4220 group=group, 4221 unpivot=is_unpivot, 4222 into=into, 4223 ) 4224 4225 def _parse_pivot_in(self) -> exp.In: 4226 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4227 this = self._parse_select_or_expression() 4228 4229 self._match(TokenType.ALIAS) 4230 alias = self._parse_bitwise() 4231 if alias: 4232 if isinstance(alias, exp.Column) and not alias.db: 4233 alias = alias.this 4234 return self.expression(exp.PivotAlias, this=this, alias=alias) 4235 4236 return this 4237 4238 value = self._parse_column() 4239 4240 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4241 self.raise_error("Expecting IN (") 4242 4243 if self._match(TokenType.ANY): 4244 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4245 else: 4246 exprs = self._parse_csv(_parse_aliased_expression) 4247 4248 self._match_r_paren() 4249 return self.expression(exp.In, this=value, expressions=exprs) 4250 4251 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4252 index = self._index 4253 include_nulls = None 4254 4255 if self._match(TokenType.PIVOT): 4256 unpivot = False 4257 elif self._match(TokenType.UNPIVOT): 4258 unpivot = True 4259 4260 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4261 if self._match_text_seq("INCLUDE", "NULLS"): 4262 include_nulls = True 4263 elif self._match_text_seq("EXCLUDE", "NULLS"): 4264 include_nulls = False 4265 else: 4266 return None 4267 4268 expressions = [] 4269 4270 if not self._match(TokenType.L_PAREN): 4271 self._retreat(index) 4272 return None 4273 4274 if unpivot: 4275 expressions = self._parse_csv(self._parse_column) 4276 else: 4277 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4278 4279 if not expressions: 4280 self.raise_error("Failed to parse PIVOT's aggregation list") 4281 4282 if not self._match(TokenType.FOR): 4283 self.raise_error("Expecting FOR") 4284 4285 fields = [] 4286 while True: 4287 field = self._try_parse(self._parse_pivot_in) 4288 if not field: 4289 break 4290 fields.append(field) 4291 4292 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4293 self._parse_bitwise 4294 ) 4295 4296 group = self._parse_group() 4297 4298 self._match_r_paren() 4299 4300 pivot = self.expression( 4301 exp.Pivot, 4302 expressions=expressions, 4303 fields=fields, 4304 unpivot=unpivot, 4305 include_nulls=include_nulls, 4306 default_on_null=default_on_null, 4307 group=group, 4308 ) 4309 4310 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4311 pivot.set("alias", self._parse_table_alias()) 4312 4313 if not unpivot: 4314 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4315 4316 columns: t.List[exp.Expression] = [] 4317 all_fields = [] 4318 for pivot_field in pivot.fields: 4319 pivot_field_expressions = pivot_field.expressions 4320 4321 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4322 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4323 continue 4324 4325 all_fields.append( 4326 [ 4327 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4328 for fld in pivot_field_expressions 4329 ] 4330 ) 4331 4332 if all_fields: 4333 if names: 4334 all_fields.append(names) 4335 4336 # Generate all possible combinations of the pivot columns 4337 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4338 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4339 for fld_parts_tuple in itertools.product(*all_fields): 4340 fld_parts = list(fld_parts_tuple) 4341 4342 if names and self.PREFIXED_PIVOT_COLUMNS: 4343 # Move the "name" to the front of the list 4344 fld_parts.insert(0, fld_parts.pop(-1)) 4345 4346 columns.append(exp.to_identifier("_".join(fld_parts))) 4347 4348 pivot.set("columns", columns) 4349 4350 return pivot 4351 4352 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4353 return [agg.alias for agg in aggregations if agg.alias] 4354 4355 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4356 if not skip_where_token and not self._match(TokenType.PREWHERE): 4357 return None 4358 4359 return self.expression( 4360 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4361 ) 4362 4363 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4364 if not skip_where_token and not self._match(TokenType.WHERE): 4365 return None 4366 4367 return self.expression( 4368 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4369 ) 4370 4371 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4372 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4373 return None 4374 4375 elements: t.Dict[str, t.Any] = defaultdict(list) 4376 4377 if self._match(TokenType.ALL): 4378 elements["all"] = True 4379 elif self._match(TokenType.DISTINCT): 4380 elements["all"] = False 4381 4382 while True: 4383 index = self._index 4384 4385 elements["expressions"].extend( 4386 self._parse_csv( 4387 lambda: None 4388 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4389 else self._parse_assignment() 4390 ) 4391 ) 4392 4393 before_with_index = self._index 4394 with_prefix = self._match(TokenType.WITH) 4395 4396 if self._match(TokenType.ROLLUP): 4397 elements["rollup"].append( 4398 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4399 ) 4400 elif self._match(TokenType.CUBE): 4401 elements["cube"].append( 4402 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4403 ) 4404 elif self._match(TokenType.GROUPING_SETS): 4405 elements["grouping_sets"].append( 4406 self.expression( 4407 exp.GroupingSets, 4408 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4409 ) 4410 ) 4411 elif self._match_text_seq("TOTALS"): 4412 elements["totals"] = True # type: ignore 4413 4414 if before_with_index <= self._index <= before_with_index + 1: 4415 self._retreat(before_with_index) 4416 break 4417 4418 if index == self._index: 4419 break 4420 4421 return self.expression(exp.Group, **elements) # type: ignore 4422 4423 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4424 return self.expression( 4425 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4426 ) 4427 4428 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4429 if self._match(TokenType.L_PAREN): 4430 grouping_set = self._parse_csv(self._parse_column) 4431 self._match_r_paren() 4432 return self.expression(exp.Tuple, expressions=grouping_set) 4433 4434 return self._parse_column() 4435 4436 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4437 if not skip_having_token and not self._match(TokenType.HAVING): 4438 return None 4439 return self.expression(exp.Having, this=self._parse_assignment()) 4440 4441 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4442 if not self._match(TokenType.QUALIFY): 4443 return None 4444 return self.expression(exp.Qualify, this=self._parse_assignment()) 4445 4446 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4447 if skip_start_token: 4448 start = None 4449 elif self._match(TokenType.START_WITH): 4450 start = self._parse_assignment() 4451 else: 4452 return None 4453 4454 self._match(TokenType.CONNECT_BY) 4455 nocycle = self._match_text_seq("NOCYCLE") 4456 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4457 exp.Prior, this=self._parse_bitwise() 4458 ) 4459 connect = self._parse_assignment() 4460 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4461 4462 if not start and self._match(TokenType.START_WITH): 4463 start = self._parse_assignment() 4464 4465 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4466 4467 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4468 this = self._parse_id_var(any_token=True) 4469 if self._match(TokenType.ALIAS): 4470 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4471 return this 4472 4473 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4474 if self._match_text_seq("INTERPOLATE"): 4475 return self._parse_wrapped_csv(self._parse_name_as_expression) 4476 return None 4477 4478 def _parse_order( 4479 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4480 ) -> t.Optional[exp.Expression]: 4481 siblings = None 4482 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4483 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4484 return this 4485 4486 siblings = True 4487 4488 return self.expression( 4489 exp.Order, 4490 this=this, 4491 expressions=self._parse_csv(self._parse_ordered), 4492 siblings=siblings, 4493 ) 4494 4495 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4496 if not self._match(token): 4497 return None 4498 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4499 4500 def _parse_ordered( 4501 self, parse_method: t.Optional[t.Callable] = None 4502 ) -> t.Optional[exp.Ordered]: 4503 this = parse_method() if parse_method else self._parse_assignment() 4504 if not this: 4505 return None 4506 4507 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4508 this = exp.var("ALL") 4509 4510 asc = self._match(TokenType.ASC) 4511 desc = self._match(TokenType.DESC) or (asc and False) 4512 4513 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4514 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4515 4516 nulls_first = is_nulls_first or False 4517 explicitly_null_ordered = is_nulls_first or is_nulls_last 4518 4519 if ( 4520 not explicitly_null_ordered 4521 and ( 4522 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4523 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4524 ) 4525 and self.dialect.NULL_ORDERING != "nulls_are_last" 4526 ): 4527 nulls_first = True 4528 4529 if self._match_text_seq("WITH", "FILL"): 4530 with_fill = self.expression( 4531 exp.WithFill, 4532 **{ # type: ignore 4533 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4534 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4535 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4536 "interpolate": self._parse_interpolate(), 4537 }, 4538 ) 4539 else: 4540 with_fill = None 4541 4542 return self.expression( 4543 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4544 ) 4545 4546 def _parse_limit_options(self) -> exp.LimitOptions: 4547 percent = self._match(TokenType.PERCENT) 4548 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4549 self._match_text_seq("ONLY") 4550 with_ties = self._match_text_seq("WITH", "TIES") 4551 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4552 4553 def _parse_limit( 4554 self, 4555 this: t.Optional[exp.Expression] = None, 4556 top: bool = False, 4557 skip_limit_token: bool = False, 4558 ) -> t.Optional[exp.Expression]: 4559 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4560 comments = self._prev_comments 4561 if top: 4562 limit_paren = self._match(TokenType.L_PAREN) 4563 expression = self._parse_term() if limit_paren else self._parse_number() 4564 4565 if limit_paren: 4566 self._match_r_paren() 4567 4568 limit_options = self._parse_limit_options() 4569 else: 4570 limit_options = None 4571 expression = self._parse_term() 4572 4573 if self._match(TokenType.COMMA): 4574 offset = expression 4575 expression = self._parse_term() 4576 else: 4577 offset = None 4578 4579 limit_exp = self.expression( 4580 exp.Limit, 4581 this=this, 4582 expression=expression, 4583 offset=offset, 4584 comments=comments, 4585 limit_options=limit_options, 4586 expressions=self._parse_limit_by(), 4587 ) 4588 4589 return limit_exp 4590 4591 if self._match(TokenType.FETCH): 4592 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4593 direction = self._prev.text.upper() if direction else "FIRST" 4594 4595 count = self._parse_field(tokens=self.FETCH_TOKENS) 4596 4597 return self.expression( 4598 exp.Fetch, 4599 direction=direction, 4600 count=count, 4601 limit_options=self._parse_limit_options(), 4602 ) 4603 4604 return this 4605 4606 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4607 if not self._match(TokenType.OFFSET): 4608 return this 4609 4610 count = self._parse_term() 4611 self._match_set((TokenType.ROW, TokenType.ROWS)) 4612 4613 return self.expression( 4614 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4615 ) 4616 4617 def _can_parse_limit_or_offset(self) -> bool: 4618 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4619 return False 4620 4621 index = self._index 4622 result = bool( 4623 self._try_parse(self._parse_limit, retreat=True) 4624 or self._try_parse(self._parse_offset, retreat=True) 4625 ) 4626 self._retreat(index) 4627 return result 4628 4629 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4630 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4631 4632 def _parse_locks(self) -> t.List[exp.Lock]: 4633 locks = [] 4634 while True: 4635 if self._match_text_seq("FOR", "UPDATE"): 4636 update = True 4637 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4638 "LOCK", "IN", "SHARE", "MODE" 4639 ): 4640 update = False 4641 else: 4642 break 4643 4644 expressions = None 4645 if self._match_text_seq("OF"): 4646 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4647 4648 wait: t.Optional[bool | exp.Expression] = None 4649 if self._match_text_seq("NOWAIT"): 4650 wait = True 4651 elif self._match_text_seq("WAIT"): 4652 wait = self._parse_primary() 4653 elif self._match_text_seq("SKIP", "LOCKED"): 4654 wait = False 4655 4656 locks.append( 4657 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4658 ) 4659 4660 return locks 4661 4662 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4663 start = self._index 4664 _, side_token, kind_token = self._parse_join_parts() 4665 4666 side = side_token.text if side_token else None 4667 kind = kind_token.text if kind_token else None 4668 4669 if not self._match_set(self.SET_OPERATIONS): 4670 self._retreat(start) 4671 return None 4672 4673 token_type = self._prev.token_type 4674 4675 if token_type == TokenType.UNION: 4676 operation: t.Type[exp.SetOperation] = exp.Union 4677 elif token_type == TokenType.EXCEPT: 4678 operation = exp.Except 4679 else: 4680 operation = exp.Intersect 4681 4682 comments = self._prev.comments 4683 4684 if self._match(TokenType.DISTINCT): 4685 distinct: t.Optional[bool] = True 4686 elif self._match(TokenType.ALL): 4687 distinct = False 4688 else: 4689 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4690 if distinct is None: 4691 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4692 4693 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4694 "STRICT", "CORRESPONDING" 4695 ) 4696 if self._match_text_seq("CORRESPONDING"): 4697 by_name = True 4698 if not side and not kind: 4699 kind = "INNER" 4700 4701 on_column_list = None 4702 if by_name and self._match_texts(("ON", "BY")): 4703 on_column_list = self._parse_wrapped_csv(self._parse_column) 4704 4705 expression = self._parse_select(nested=True, parse_set_operation=False) 4706 4707 return self.expression( 4708 operation, 4709 comments=comments, 4710 this=this, 4711 distinct=distinct, 4712 by_name=by_name, 4713 expression=expression, 4714 side=side, 4715 kind=kind, 4716 on=on_column_list, 4717 ) 4718 4719 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4720 while True: 4721 setop = self.parse_set_operation(this) 4722 if not setop: 4723 break 4724 this = setop 4725 4726 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4727 expression = this.expression 4728 4729 if expression: 4730 for arg in self.SET_OP_MODIFIERS: 4731 expr = expression.args.get(arg) 4732 if expr: 4733 this.set(arg, expr.pop()) 4734 4735 return this 4736 4737 def _parse_expression(self) -> t.Optional[exp.Expression]: 4738 return self._parse_alias(self._parse_assignment()) 4739 4740 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4741 this = self._parse_disjunction() 4742 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4743 # This allows us to parse <non-identifier token> := <expr> 4744 this = exp.column( 4745 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4746 ) 4747 4748 while self._match_set(self.ASSIGNMENT): 4749 if isinstance(this, exp.Column) and len(this.parts) == 1: 4750 this = this.this 4751 4752 this = self.expression( 4753 self.ASSIGNMENT[self._prev.token_type], 4754 this=this, 4755 comments=self._prev_comments, 4756 expression=self._parse_assignment(), 4757 ) 4758 4759 return this 4760 4761 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4762 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4763 4764 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4765 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4766 4767 def _parse_equality(self) -> t.Optional[exp.Expression]: 4768 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4769 4770 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4771 return self._parse_tokens(self._parse_range, self.COMPARISON) 4772 4773 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4774 this = this or self._parse_bitwise() 4775 negate = self._match(TokenType.NOT) 4776 4777 if self._match_set(self.RANGE_PARSERS): 4778 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4779 if not expression: 4780 return this 4781 4782 this = expression 4783 elif self._match(TokenType.ISNULL): 4784 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4785 4786 # Postgres supports ISNULL and NOTNULL for conditions. 4787 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4788 if self._match(TokenType.NOTNULL): 4789 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4790 this = self.expression(exp.Not, this=this) 4791 4792 if negate: 4793 this = self._negate_range(this) 4794 4795 if self._match(TokenType.IS): 4796 this = self._parse_is(this) 4797 4798 return this 4799 4800 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4801 if not this: 4802 return this 4803 4804 return self.expression(exp.Not, this=this) 4805 4806 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4807 index = self._index - 1 4808 negate = self._match(TokenType.NOT) 4809 4810 if self._match_text_seq("DISTINCT", "FROM"): 4811 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4812 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4813 4814 if self._match(TokenType.JSON): 4815 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4816 4817 if self._match_text_seq("WITH"): 4818 _with = True 4819 elif self._match_text_seq("WITHOUT"): 4820 _with = False 4821 else: 4822 _with = None 4823 4824 unique = self._match(TokenType.UNIQUE) 4825 self._match_text_seq("KEYS") 4826 expression: t.Optional[exp.Expression] = self.expression( 4827 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4828 ) 4829 else: 4830 expression = self._parse_primary() or self._parse_null() 4831 if not expression: 4832 self._retreat(index) 4833 return None 4834 4835 this = self.expression(exp.Is, this=this, expression=expression) 4836 return self.expression(exp.Not, this=this) if negate else this 4837 4838 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4839 unnest = self._parse_unnest(with_alias=False) 4840 if unnest: 4841 this = self.expression(exp.In, this=this, unnest=unnest) 4842 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4843 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4844 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4845 4846 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4847 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4848 else: 4849 this = self.expression(exp.In, this=this, expressions=expressions) 4850 4851 if matched_l_paren: 4852 self._match_r_paren(this) 4853 elif not self._match(TokenType.R_BRACKET, expression=this): 4854 self.raise_error("Expecting ]") 4855 else: 4856 this = self.expression(exp.In, this=this, field=self._parse_column()) 4857 4858 return this 4859 4860 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4861 low = self._parse_bitwise() 4862 self._match(TokenType.AND) 4863 high = self._parse_bitwise() 4864 return self.expression(exp.Between, this=this, low=low, high=high) 4865 4866 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4867 if not self._match(TokenType.ESCAPE): 4868 return this 4869 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4870 4871 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4872 index = self._index 4873 4874 if not self._match(TokenType.INTERVAL) and match_interval: 4875 return None 4876 4877 if self._match(TokenType.STRING, advance=False): 4878 this = self._parse_primary() 4879 else: 4880 this = self._parse_term() 4881 4882 if not this or ( 4883 isinstance(this, exp.Column) 4884 and not this.table 4885 and not this.this.quoted 4886 and this.name.upper() == "IS" 4887 ): 4888 self._retreat(index) 4889 return None 4890 4891 unit = self._parse_function() or ( 4892 not self._match(TokenType.ALIAS, advance=False) 4893 and self._parse_var(any_token=True, upper=True) 4894 ) 4895 4896 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4897 # each INTERVAL expression into this canonical form so it's easy to transpile 4898 if this and this.is_number: 4899 this = exp.Literal.string(this.to_py()) 4900 elif this and this.is_string: 4901 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4902 if parts and unit: 4903 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4904 unit = None 4905 self._retreat(self._index - 1) 4906 4907 if len(parts) == 1: 4908 this = exp.Literal.string(parts[0][0]) 4909 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4910 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4911 unit = self.expression( 4912 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4913 ) 4914 4915 interval = self.expression(exp.Interval, this=this, unit=unit) 4916 4917 index = self._index 4918 self._match(TokenType.PLUS) 4919 4920 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4921 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4922 return self.expression( 4923 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4924 ) 4925 4926 self._retreat(index) 4927 return interval 4928 4929 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4930 this = self._parse_term() 4931 4932 while True: 4933 if self._match_set(self.BITWISE): 4934 this = self.expression( 4935 self.BITWISE[self._prev.token_type], 4936 this=this, 4937 expression=self._parse_term(), 4938 ) 4939 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4940 this = self.expression( 4941 exp.DPipe, 4942 this=this, 4943 expression=self._parse_term(), 4944 safe=not self.dialect.STRICT_STRING_CONCAT, 4945 ) 4946 elif self._match(TokenType.DQMARK): 4947 this = self.expression( 4948 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4949 ) 4950 elif self._match_pair(TokenType.LT, TokenType.LT): 4951 this = self.expression( 4952 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4953 ) 4954 elif self._match_pair(TokenType.GT, TokenType.GT): 4955 this = self.expression( 4956 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4957 ) 4958 else: 4959 break 4960 4961 return this 4962 4963 def _parse_term(self) -> t.Optional[exp.Expression]: 4964 this = self._parse_factor() 4965 4966 while self._match_set(self.TERM): 4967 klass = self.TERM[self._prev.token_type] 4968 comments = self._prev_comments 4969 expression = self._parse_factor() 4970 4971 this = self.expression(klass, this=this, comments=comments, expression=expression) 4972 4973 if isinstance(this, exp.Collate): 4974 expr = this.expression 4975 4976 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4977 # fallback to Identifier / Var 4978 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4979 ident = expr.this 4980 if isinstance(ident, exp.Identifier): 4981 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4982 4983 return this 4984 4985 def _parse_factor(self) -> t.Optional[exp.Expression]: 4986 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4987 this = parse_method() 4988 4989 while self._match_set(self.FACTOR): 4990 klass = self.FACTOR[self._prev.token_type] 4991 comments = self._prev_comments 4992 expression = parse_method() 4993 4994 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4995 self._retreat(self._index - 1) 4996 return this 4997 4998 this = self.expression(klass, this=this, comments=comments, expression=expression) 4999 5000 if isinstance(this, exp.Div): 5001 this.args["typed"] = self.dialect.TYPED_DIVISION 5002 this.args["safe"] = self.dialect.SAFE_DIVISION 5003 5004 return this 5005 5006 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5007 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5008 5009 def _parse_unary(self) -> t.Optional[exp.Expression]: 5010 if self._match_set(self.UNARY_PARSERS): 5011 return self.UNARY_PARSERS[self._prev.token_type](self) 5012 return self._parse_at_time_zone(self._parse_type()) 5013 5014 def _parse_type( 5015 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5016 ) -> t.Optional[exp.Expression]: 5017 interval = parse_interval and self._parse_interval() 5018 if interval: 5019 return interval 5020 5021 index = self._index 5022 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5023 5024 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5025 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5026 if isinstance(data_type, exp.Cast): 5027 # This constructor can contain ops directly after it, for instance struct unnesting: 5028 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5029 return self._parse_column_ops(data_type) 5030 5031 if data_type: 5032 index2 = self._index 5033 this = self._parse_primary() 5034 5035 if isinstance(this, exp.Literal): 5036 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5037 if parser: 5038 return parser(self, this, data_type) 5039 5040 return self.expression(exp.Cast, this=this, to=data_type) 5041 5042 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5043 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5044 # 5045 # If the index difference here is greater than 1, that means the parser itself must have 5046 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5047 # 5048 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5049 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5050 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5051 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5052 # 5053 # In these cases, we don't really want to return the converted type, but instead retreat 5054 # and try to parse a Column or Identifier in the section below. 5055 if data_type.expressions and index2 - index > 1: 5056 self._retreat(index2) 5057 return self._parse_column_ops(data_type) 5058 5059 self._retreat(index) 5060 5061 if fallback_to_identifier: 5062 return self._parse_id_var() 5063 5064 this = self._parse_column() 5065 return this and self._parse_column_ops(this) 5066 5067 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5068 this = self._parse_type() 5069 if not this: 5070 return None 5071 5072 if isinstance(this, exp.Column) and not this.table: 5073 this = exp.var(this.name.upper()) 5074 5075 return self.expression( 5076 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5077 ) 5078 5079 def _parse_types( 5080 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5081 ) -> t.Optional[exp.Expression]: 5082 index = self._index 5083 5084 this: t.Optional[exp.Expression] = None 5085 prefix = self._match_text_seq("SYSUDTLIB", ".") 5086 5087 if not self._match_set(self.TYPE_TOKENS): 5088 identifier = allow_identifiers and self._parse_id_var( 5089 any_token=False, tokens=(TokenType.VAR,) 5090 ) 5091 if isinstance(identifier, exp.Identifier): 5092 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5093 5094 if len(tokens) != 1: 5095 self.raise_error("Unexpected identifier", self._prev) 5096 5097 if tokens[0].token_type in self.TYPE_TOKENS: 5098 self._prev = tokens[0] 5099 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5100 type_name = identifier.name 5101 5102 while self._match(TokenType.DOT): 5103 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5104 5105 this = exp.DataType.build(type_name, udt=True) 5106 else: 5107 self._retreat(self._index - 1) 5108 return None 5109 else: 5110 return None 5111 5112 type_token = self._prev.token_type 5113 5114 if type_token == TokenType.PSEUDO_TYPE: 5115 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5116 5117 if type_token == TokenType.OBJECT_IDENTIFIER: 5118 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5119 5120 # https://materialize.com/docs/sql/types/map/ 5121 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5122 key_type = self._parse_types( 5123 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5124 ) 5125 if not self._match(TokenType.FARROW): 5126 self._retreat(index) 5127 return None 5128 5129 value_type = self._parse_types( 5130 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5131 ) 5132 if not self._match(TokenType.R_BRACKET): 5133 self._retreat(index) 5134 return None 5135 5136 return exp.DataType( 5137 this=exp.DataType.Type.MAP, 5138 expressions=[key_type, value_type], 5139 nested=True, 5140 prefix=prefix, 5141 ) 5142 5143 nested = type_token in self.NESTED_TYPE_TOKENS 5144 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5145 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5146 expressions = None 5147 maybe_func = False 5148 5149 if self._match(TokenType.L_PAREN): 5150 if is_struct: 5151 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5152 elif nested: 5153 expressions = self._parse_csv( 5154 lambda: self._parse_types( 5155 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5156 ) 5157 ) 5158 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5159 this = expressions[0] 5160 this.set("nullable", True) 5161 self._match_r_paren() 5162 return this 5163 elif type_token in self.ENUM_TYPE_TOKENS: 5164 expressions = self._parse_csv(self._parse_equality) 5165 elif is_aggregate: 5166 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5167 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5168 ) 5169 if not func_or_ident: 5170 return None 5171 expressions = [func_or_ident] 5172 if self._match(TokenType.COMMA): 5173 expressions.extend( 5174 self._parse_csv( 5175 lambda: self._parse_types( 5176 check_func=check_func, 5177 schema=schema, 5178 allow_identifiers=allow_identifiers, 5179 ) 5180 ) 5181 ) 5182 else: 5183 expressions = self._parse_csv(self._parse_type_size) 5184 5185 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5186 if type_token == TokenType.VECTOR and len(expressions) == 2: 5187 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5188 5189 if not expressions or not self._match(TokenType.R_PAREN): 5190 self._retreat(index) 5191 return None 5192 5193 maybe_func = True 5194 5195 values: t.Optional[t.List[exp.Expression]] = None 5196 5197 if nested and self._match(TokenType.LT): 5198 if is_struct: 5199 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5200 else: 5201 expressions = self._parse_csv( 5202 lambda: self._parse_types( 5203 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5204 ) 5205 ) 5206 5207 if not self._match(TokenType.GT): 5208 self.raise_error("Expecting >") 5209 5210 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5211 values = self._parse_csv(self._parse_assignment) 5212 if not values and is_struct: 5213 values = None 5214 self._retreat(self._index - 1) 5215 else: 5216 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5217 5218 if type_token in self.TIMESTAMPS: 5219 if self._match_text_seq("WITH", "TIME", "ZONE"): 5220 maybe_func = False 5221 tz_type = ( 5222 exp.DataType.Type.TIMETZ 5223 if type_token in self.TIMES 5224 else exp.DataType.Type.TIMESTAMPTZ 5225 ) 5226 this = exp.DataType(this=tz_type, expressions=expressions) 5227 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5228 maybe_func = False 5229 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5230 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5231 maybe_func = False 5232 elif type_token == TokenType.INTERVAL: 5233 unit = self._parse_var(upper=True) 5234 if unit: 5235 if self._match_text_seq("TO"): 5236 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5237 5238 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5239 else: 5240 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5241 5242 if maybe_func and check_func: 5243 index2 = self._index 5244 peek = self._parse_string() 5245 5246 if not peek: 5247 self._retreat(index) 5248 return None 5249 5250 self._retreat(index2) 5251 5252 if not this: 5253 if self._match_text_seq("UNSIGNED"): 5254 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5255 if not unsigned_type_token: 5256 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5257 5258 type_token = unsigned_type_token or type_token 5259 5260 this = exp.DataType( 5261 this=exp.DataType.Type[type_token.value], 5262 expressions=expressions, 5263 nested=nested, 5264 prefix=prefix, 5265 ) 5266 5267 # Empty arrays/structs are allowed 5268 if values is not None: 5269 cls = exp.Struct if is_struct else exp.Array 5270 this = exp.cast(cls(expressions=values), this, copy=False) 5271 5272 elif expressions: 5273 this.set("expressions", expressions) 5274 5275 # https://materialize.com/docs/sql/types/list/#type-name 5276 while self._match(TokenType.LIST): 5277 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5278 5279 index = self._index 5280 5281 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5282 matched_array = self._match(TokenType.ARRAY) 5283 5284 while self._curr: 5285 datatype_token = self._prev.token_type 5286 matched_l_bracket = self._match(TokenType.L_BRACKET) 5287 5288 if (not matched_l_bracket and not matched_array) or ( 5289 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5290 ): 5291 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5292 # not to be confused with the fixed size array parsing 5293 break 5294 5295 matched_array = False 5296 values = self._parse_csv(self._parse_assignment) or None 5297 if ( 5298 values 5299 and not schema 5300 and ( 5301 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5302 ) 5303 ): 5304 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5305 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5306 self._retreat(index) 5307 break 5308 5309 this = exp.DataType( 5310 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5311 ) 5312 self._match(TokenType.R_BRACKET) 5313 5314 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5315 converter = self.TYPE_CONVERTERS.get(this.this) 5316 if converter: 5317 this = converter(t.cast(exp.DataType, this)) 5318 5319 return this 5320 5321 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5322 index = self._index 5323 5324 if ( 5325 self._curr 5326 and self._next 5327 and self._curr.token_type in self.TYPE_TOKENS 5328 and self._next.token_type in self.TYPE_TOKENS 5329 ): 5330 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5331 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5332 this = self._parse_id_var() 5333 else: 5334 this = ( 5335 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5336 or self._parse_id_var() 5337 ) 5338 5339 self._match(TokenType.COLON) 5340 5341 if ( 5342 type_required 5343 and not isinstance(this, exp.DataType) 5344 and not self._match_set(self.TYPE_TOKENS, advance=False) 5345 ): 5346 self._retreat(index) 5347 return self._parse_types() 5348 5349 return self._parse_column_def(this) 5350 5351 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5352 if not self._match_text_seq("AT", "TIME", "ZONE"): 5353 return this 5354 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5355 5356 def _parse_column(self) -> t.Optional[exp.Expression]: 5357 this = self._parse_column_reference() 5358 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5359 5360 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5361 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5362 5363 return column 5364 5365 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5366 this = self._parse_field() 5367 if ( 5368 not this 5369 and self._match(TokenType.VALUES, advance=False) 5370 and self.VALUES_FOLLOWED_BY_PAREN 5371 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5372 ): 5373 this = self._parse_id_var() 5374 5375 if isinstance(this, exp.Identifier): 5376 # We bubble up comments from the Identifier to the Column 5377 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5378 5379 return this 5380 5381 def _parse_colon_as_variant_extract( 5382 self, this: t.Optional[exp.Expression] 5383 ) -> t.Optional[exp.Expression]: 5384 casts = [] 5385 json_path = [] 5386 escape = None 5387 5388 while self._match(TokenType.COLON): 5389 start_index = self._index 5390 5391 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5392 path = self._parse_column_ops( 5393 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5394 ) 5395 5396 # The cast :: operator has a lower precedence than the extraction operator :, so 5397 # we rearrange the AST appropriately to avoid casting the JSON path 5398 while isinstance(path, exp.Cast): 5399 casts.append(path.to) 5400 path = path.this 5401 5402 if casts: 5403 dcolon_offset = next( 5404 i 5405 for i, t in enumerate(self._tokens[start_index:]) 5406 if t.token_type == TokenType.DCOLON 5407 ) 5408 end_token = self._tokens[start_index + dcolon_offset - 1] 5409 else: 5410 end_token = self._prev 5411 5412 if path: 5413 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5414 # it'll roundtrip to a string literal in GET_PATH 5415 if isinstance(path, exp.Identifier) and path.quoted: 5416 escape = True 5417 5418 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5419 5420 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5421 # Databricks transforms it back to the colon/dot notation 5422 if json_path: 5423 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5424 5425 if json_path_expr: 5426 json_path_expr.set("escape", escape) 5427 5428 this = self.expression( 5429 exp.JSONExtract, 5430 this=this, 5431 expression=json_path_expr, 5432 variant_extract=True, 5433 ) 5434 5435 while casts: 5436 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5437 5438 return this 5439 5440 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5441 return self._parse_types() 5442 5443 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5444 this = self._parse_bracket(this) 5445 5446 while self._match_set(self.COLUMN_OPERATORS): 5447 op_token = self._prev.token_type 5448 op = self.COLUMN_OPERATORS.get(op_token) 5449 5450 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5451 field = self._parse_dcolon() 5452 if not field: 5453 self.raise_error("Expected type") 5454 elif op and self._curr: 5455 field = self._parse_column_reference() or self._parse_bracket() 5456 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5457 field = self._parse_column_ops(field) 5458 else: 5459 field = self._parse_field(any_token=True, anonymous_func=True) 5460 5461 if isinstance(field, (exp.Func, exp.Window)) and this: 5462 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5463 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5464 this = exp.replace_tree( 5465 this, 5466 lambda n: ( 5467 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5468 if n.table 5469 else n.this 5470 ) 5471 if isinstance(n, exp.Column) 5472 else n, 5473 ) 5474 5475 if op: 5476 this = op(self, this, field) 5477 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5478 this = self.expression( 5479 exp.Column, 5480 comments=this.comments, 5481 this=field, 5482 table=this.this, 5483 db=this.args.get("table"), 5484 catalog=this.args.get("db"), 5485 ) 5486 elif isinstance(field, exp.Window): 5487 # Move the exp.Dot's to the window's function 5488 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5489 field.set("this", window_func) 5490 this = field 5491 else: 5492 this = self.expression(exp.Dot, this=this, expression=field) 5493 5494 if field and field.comments: 5495 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5496 5497 this = self._parse_bracket(this) 5498 5499 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5500 5501 def _parse_primary(self) -> t.Optional[exp.Expression]: 5502 if self._match_set(self.PRIMARY_PARSERS): 5503 token_type = self._prev.token_type 5504 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5505 5506 if token_type == TokenType.STRING: 5507 expressions = [primary] 5508 while self._match(TokenType.STRING): 5509 expressions.append(exp.Literal.string(self._prev.text)) 5510 5511 if len(expressions) > 1: 5512 return self.expression(exp.Concat, expressions=expressions) 5513 5514 return primary 5515 5516 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5517 return exp.Literal.number(f"0.{self._prev.text}") 5518 5519 if self._match(TokenType.L_PAREN): 5520 comments = self._prev_comments 5521 query = self._parse_select() 5522 5523 if query: 5524 expressions = [query] 5525 else: 5526 expressions = self._parse_expressions() 5527 5528 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5529 5530 if not this and self._match(TokenType.R_PAREN, advance=False): 5531 this = self.expression(exp.Tuple) 5532 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5533 this = self._parse_subquery(this=this, parse_alias=False) 5534 elif isinstance(this, exp.Subquery): 5535 this = self._parse_subquery( 5536 this=self._parse_set_operations(this), parse_alias=False 5537 ) 5538 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5539 this = self.expression(exp.Tuple, expressions=expressions) 5540 else: 5541 this = self.expression(exp.Paren, this=this) 5542 5543 if this: 5544 this.add_comments(comments) 5545 5546 self._match_r_paren(expression=this) 5547 return this 5548 5549 return None 5550 5551 def _parse_field( 5552 self, 5553 any_token: bool = False, 5554 tokens: t.Optional[t.Collection[TokenType]] = None, 5555 anonymous_func: bool = False, 5556 ) -> t.Optional[exp.Expression]: 5557 if anonymous_func: 5558 field = ( 5559 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5560 or self._parse_primary() 5561 ) 5562 else: 5563 field = self._parse_primary() or self._parse_function( 5564 anonymous=anonymous_func, any_token=any_token 5565 ) 5566 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5567 5568 def _parse_function( 5569 self, 5570 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5571 anonymous: bool = False, 5572 optional_parens: bool = True, 5573 any_token: bool = False, 5574 ) -> t.Optional[exp.Expression]: 5575 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5576 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5577 fn_syntax = False 5578 if ( 5579 self._match(TokenType.L_BRACE, advance=False) 5580 and self._next 5581 and self._next.text.upper() == "FN" 5582 ): 5583 self._advance(2) 5584 fn_syntax = True 5585 5586 func = self._parse_function_call( 5587 functions=functions, 5588 anonymous=anonymous, 5589 optional_parens=optional_parens, 5590 any_token=any_token, 5591 ) 5592 5593 if fn_syntax: 5594 self._match(TokenType.R_BRACE) 5595 5596 return func 5597 5598 def _parse_function_call( 5599 self, 5600 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5601 anonymous: bool = False, 5602 optional_parens: bool = True, 5603 any_token: bool = False, 5604 ) -> t.Optional[exp.Expression]: 5605 if not self._curr: 5606 return None 5607 5608 comments = self._curr.comments 5609 token_type = self._curr.token_type 5610 this = self._curr.text 5611 upper = this.upper() 5612 5613 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5614 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5615 self._advance() 5616 return self._parse_window(parser(self)) 5617 5618 if not self._next or self._next.token_type != TokenType.L_PAREN: 5619 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5620 self._advance() 5621 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5622 5623 return None 5624 5625 if any_token: 5626 if token_type in self.RESERVED_TOKENS: 5627 return None 5628 elif token_type not in self.FUNC_TOKENS: 5629 return None 5630 5631 self._advance(2) 5632 5633 parser = self.FUNCTION_PARSERS.get(upper) 5634 if parser and not anonymous: 5635 this = parser(self) 5636 else: 5637 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5638 5639 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5640 this = self.expression( 5641 subquery_predicate, comments=comments, this=self._parse_select() 5642 ) 5643 self._match_r_paren() 5644 return this 5645 5646 if functions is None: 5647 functions = self.FUNCTIONS 5648 5649 function = functions.get(upper) 5650 known_function = function and not anonymous 5651 5652 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5653 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5654 5655 post_func_comments = self._curr and self._curr.comments 5656 if known_function and post_func_comments: 5657 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5658 # call we'll construct it as exp.Anonymous, even if it's "known" 5659 if any( 5660 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5661 for comment in post_func_comments 5662 ): 5663 known_function = False 5664 5665 if alias and known_function: 5666 args = self._kv_to_prop_eq(args) 5667 5668 if known_function: 5669 func_builder = t.cast(t.Callable, function) 5670 5671 if "dialect" in func_builder.__code__.co_varnames: 5672 func = func_builder(args, dialect=self.dialect) 5673 else: 5674 func = func_builder(args) 5675 5676 func = self.validate_expression(func, args) 5677 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5678 func.meta["name"] = this 5679 5680 this = func 5681 else: 5682 if token_type == TokenType.IDENTIFIER: 5683 this = exp.Identifier(this=this, quoted=True) 5684 this = self.expression(exp.Anonymous, this=this, expressions=args) 5685 5686 if isinstance(this, exp.Expression): 5687 this.add_comments(comments) 5688 5689 self._match_r_paren(this) 5690 return self._parse_window(this) 5691 5692 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5693 return expression 5694 5695 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5696 transformed = [] 5697 5698 for index, e in enumerate(expressions): 5699 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5700 if isinstance(e, exp.Alias): 5701 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5702 5703 if not isinstance(e, exp.PropertyEQ): 5704 e = self.expression( 5705 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5706 ) 5707 5708 if isinstance(e.this, exp.Column): 5709 e.this.replace(e.this.this) 5710 else: 5711 e = self._to_prop_eq(e, index) 5712 5713 transformed.append(e) 5714 5715 return transformed 5716 5717 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5718 return self._parse_statement() 5719 5720 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5721 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5722 5723 def _parse_user_defined_function( 5724 self, kind: t.Optional[TokenType] = None 5725 ) -> t.Optional[exp.Expression]: 5726 this = self._parse_table_parts(schema=True) 5727 5728 if not self._match(TokenType.L_PAREN): 5729 return this 5730 5731 expressions = self._parse_csv(self._parse_function_parameter) 5732 self._match_r_paren() 5733 return self.expression( 5734 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5735 ) 5736 5737 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5738 literal = self._parse_primary() 5739 if literal: 5740 return self.expression(exp.Introducer, this=token.text, expression=literal) 5741 5742 return self.expression(exp.Identifier, this=token.text) 5743 5744 def _parse_session_parameter(self) -> exp.SessionParameter: 5745 kind = None 5746 this = self._parse_id_var() or self._parse_primary() 5747 5748 if this and self._match(TokenType.DOT): 5749 kind = this.name 5750 this = self._parse_var() or self._parse_primary() 5751 5752 return self.expression(exp.SessionParameter, this=this, kind=kind) 5753 5754 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5755 return self._parse_id_var() 5756 5757 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5758 index = self._index 5759 5760 if self._match(TokenType.L_PAREN): 5761 expressions = t.cast( 5762 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5763 ) 5764 5765 if not self._match(TokenType.R_PAREN): 5766 self._retreat(index) 5767 else: 5768 expressions = [self._parse_lambda_arg()] 5769 5770 if self._match_set(self.LAMBDAS): 5771 return self.LAMBDAS[self._prev.token_type](self, expressions) 5772 5773 self._retreat(index) 5774 5775 this: t.Optional[exp.Expression] 5776 5777 if self._match(TokenType.DISTINCT): 5778 this = self.expression( 5779 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5780 ) 5781 else: 5782 this = self._parse_select_or_expression(alias=alias) 5783 5784 return self._parse_limit( 5785 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5786 ) 5787 5788 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5789 index = self._index 5790 if not self._match(TokenType.L_PAREN): 5791 return this 5792 5793 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5794 # expr can be of both types 5795 if self._match_set(self.SELECT_START_TOKENS): 5796 self._retreat(index) 5797 return this 5798 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5799 self._match_r_paren() 5800 return self.expression(exp.Schema, this=this, expressions=args) 5801 5802 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5803 return self._parse_column_def(self._parse_field(any_token=True)) 5804 5805 def _parse_column_def( 5806 self, this: t.Optional[exp.Expression], computed_column: bool = True 5807 ) -> t.Optional[exp.Expression]: 5808 # column defs are not really columns, they're identifiers 5809 if isinstance(this, exp.Column): 5810 this = this.this 5811 5812 if not computed_column: 5813 self._match(TokenType.ALIAS) 5814 5815 kind = self._parse_types(schema=True) 5816 5817 if self._match_text_seq("FOR", "ORDINALITY"): 5818 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5819 5820 constraints: t.List[exp.Expression] = [] 5821 5822 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5823 ("ALIAS", "MATERIALIZED") 5824 ): 5825 persisted = self._prev.text.upper() == "MATERIALIZED" 5826 constraint_kind = exp.ComputedColumnConstraint( 5827 this=self._parse_assignment(), 5828 persisted=persisted or self._match_text_seq("PERSISTED"), 5829 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5830 ) 5831 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5832 elif ( 5833 kind 5834 and self._match(TokenType.ALIAS, advance=False) 5835 and ( 5836 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5837 or (self._next and self._next.token_type == TokenType.L_PAREN) 5838 ) 5839 ): 5840 self._advance() 5841 constraints.append( 5842 self.expression( 5843 exp.ColumnConstraint, 5844 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5845 ) 5846 ) 5847 5848 while True: 5849 constraint = self._parse_column_constraint() 5850 if not constraint: 5851 break 5852 constraints.append(constraint) 5853 5854 if not kind and not constraints: 5855 return this 5856 5857 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5858 5859 def _parse_auto_increment( 5860 self, 5861 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5862 start = None 5863 increment = None 5864 5865 if self._match(TokenType.L_PAREN, advance=False): 5866 args = self._parse_wrapped_csv(self._parse_bitwise) 5867 start = seq_get(args, 0) 5868 increment = seq_get(args, 1) 5869 elif self._match_text_seq("START"): 5870 start = self._parse_bitwise() 5871 self._match_text_seq("INCREMENT") 5872 increment = self._parse_bitwise() 5873 5874 if start and increment: 5875 return exp.GeneratedAsIdentityColumnConstraint( 5876 start=start, increment=increment, this=False 5877 ) 5878 5879 return exp.AutoIncrementColumnConstraint() 5880 5881 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5882 if not self._match_text_seq("REFRESH"): 5883 self._retreat(self._index - 1) 5884 return None 5885 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5886 5887 def _parse_compress(self) -> exp.CompressColumnConstraint: 5888 if self._match(TokenType.L_PAREN, advance=False): 5889 return self.expression( 5890 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5891 ) 5892 5893 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5894 5895 def _parse_generated_as_identity( 5896 self, 5897 ) -> ( 5898 exp.GeneratedAsIdentityColumnConstraint 5899 | exp.ComputedColumnConstraint 5900 | exp.GeneratedAsRowColumnConstraint 5901 ): 5902 if self._match_text_seq("BY", "DEFAULT"): 5903 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5904 this = self.expression( 5905 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5906 ) 5907 else: 5908 self._match_text_seq("ALWAYS") 5909 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5910 5911 self._match(TokenType.ALIAS) 5912 5913 if self._match_text_seq("ROW"): 5914 start = self._match_text_seq("START") 5915 if not start: 5916 self._match(TokenType.END) 5917 hidden = self._match_text_seq("HIDDEN") 5918 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5919 5920 identity = self._match_text_seq("IDENTITY") 5921 5922 if self._match(TokenType.L_PAREN): 5923 if self._match(TokenType.START_WITH): 5924 this.set("start", self._parse_bitwise()) 5925 if self._match_text_seq("INCREMENT", "BY"): 5926 this.set("increment", self._parse_bitwise()) 5927 if self._match_text_seq("MINVALUE"): 5928 this.set("minvalue", self._parse_bitwise()) 5929 if self._match_text_seq("MAXVALUE"): 5930 this.set("maxvalue", self._parse_bitwise()) 5931 5932 if self._match_text_seq("CYCLE"): 5933 this.set("cycle", True) 5934 elif self._match_text_seq("NO", "CYCLE"): 5935 this.set("cycle", False) 5936 5937 if not identity: 5938 this.set("expression", self._parse_range()) 5939 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5940 args = self._parse_csv(self._parse_bitwise) 5941 this.set("start", seq_get(args, 0)) 5942 this.set("increment", seq_get(args, 1)) 5943 5944 self._match_r_paren() 5945 5946 return this 5947 5948 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5949 self._match_text_seq("LENGTH") 5950 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5951 5952 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5953 if self._match_text_seq("NULL"): 5954 return self.expression(exp.NotNullColumnConstraint) 5955 if self._match_text_seq("CASESPECIFIC"): 5956 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5957 if self._match_text_seq("FOR", "REPLICATION"): 5958 return self.expression(exp.NotForReplicationColumnConstraint) 5959 5960 # Unconsume the `NOT` token 5961 self._retreat(self._index - 1) 5962 return None 5963 5964 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5965 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5966 5967 procedure_option_follows = ( 5968 self._match(TokenType.WITH, advance=False) 5969 and self._next 5970 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5971 ) 5972 5973 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5974 return self.expression( 5975 exp.ColumnConstraint, 5976 this=this, 5977 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5978 ) 5979 5980 return this 5981 5982 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5983 if not self._match(TokenType.CONSTRAINT): 5984 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5985 5986 return self.expression( 5987 exp.Constraint, 5988 this=self._parse_id_var(), 5989 expressions=self._parse_unnamed_constraints(), 5990 ) 5991 5992 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5993 constraints = [] 5994 while True: 5995 constraint = self._parse_unnamed_constraint() or self._parse_function() 5996 if not constraint: 5997 break 5998 constraints.append(constraint) 5999 6000 return constraints 6001 6002 def _parse_unnamed_constraint( 6003 self, constraints: t.Optional[t.Collection[str]] = None 6004 ) -> t.Optional[exp.Expression]: 6005 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6006 constraints or self.CONSTRAINT_PARSERS 6007 ): 6008 return None 6009 6010 constraint = self._prev.text.upper() 6011 if constraint not in self.CONSTRAINT_PARSERS: 6012 self.raise_error(f"No parser found for schema constraint {constraint}.") 6013 6014 return self.CONSTRAINT_PARSERS[constraint](self) 6015 6016 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6017 return self._parse_id_var(any_token=False) 6018 6019 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6020 self._match_text_seq("KEY") 6021 return self.expression( 6022 exp.UniqueColumnConstraint, 6023 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6024 this=self._parse_schema(self._parse_unique_key()), 6025 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6026 on_conflict=self._parse_on_conflict(), 6027 options=self._parse_key_constraint_options(), 6028 ) 6029 6030 def _parse_key_constraint_options(self) -> t.List[str]: 6031 options = [] 6032 while True: 6033 if not self._curr: 6034 break 6035 6036 if self._match(TokenType.ON): 6037 action = None 6038 on = self._advance_any() and self._prev.text 6039 6040 if self._match_text_seq("NO", "ACTION"): 6041 action = "NO ACTION" 6042 elif self._match_text_seq("CASCADE"): 6043 action = "CASCADE" 6044 elif self._match_text_seq("RESTRICT"): 6045 action = "RESTRICT" 6046 elif self._match_pair(TokenType.SET, TokenType.NULL): 6047 action = "SET NULL" 6048 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6049 action = "SET DEFAULT" 6050 else: 6051 self.raise_error("Invalid key constraint") 6052 6053 options.append(f"ON {on} {action}") 6054 else: 6055 var = self._parse_var_from_options( 6056 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6057 ) 6058 if not var: 6059 break 6060 options.append(var.name) 6061 6062 return options 6063 6064 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6065 if match and not self._match(TokenType.REFERENCES): 6066 return None 6067 6068 expressions = None 6069 this = self._parse_table(schema=True) 6070 options = self._parse_key_constraint_options() 6071 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6072 6073 def _parse_foreign_key(self) -> exp.ForeignKey: 6074 expressions = self._parse_wrapped_id_vars() 6075 reference = self._parse_references() 6076 on_options = {} 6077 6078 while self._match(TokenType.ON): 6079 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6080 self.raise_error("Expected DELETE or UPDATE") 6081 6082 kind = self._prev.text.lower() 6083 6084 if self._match_text_seq("NO", "ACTION"): 6085 action = "NO ACTION" 6086 elif self._match(TokenType.SET): 6087 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6088 action = "SET " + self._prev.text.upper() 6089 else: 6090 self._advance() 6091 action = self._prev.text.upper() 6092 6093 on_options[kind] = action 6094 6095 return self.expression( 6096 exp.ForeignKey, 6097 expressions=expressions, 6098 reference=reference, 6099 options=self._parse_key_constraint_options(), 6100 **on_options, # type: ignore 6101 ) 6102 6103 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6104 return self._parse_ordered() or self._parse_field() 6105 6106 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6107 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6108 self._retreat(self._index - 1) 6109 return None 6110 6111 id_vars = self._parse_wrapped_id_vars() 6112 return self.expression( 6113 exp.PeriodForSystemTimeConstraint, 6114 this=seq_get(id_vars, 0), 6115 expression=seq_get(id_vars, 1), 6116 ) 6117 6118 def _parse_primary_key( 6119 self, wrapped_optional: bool = False, in_props: bool = False 6120 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6121 desc = ( 6122 self._match_set((TokenType.ASC, TokenType.DESC)) 6123 and self._prev.token_type == TokenType.DESC 6124 ) 6125 6126 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6127 return self.expression( 6128 exp.PrimaryKeyColumnConstraint, 6129 desc=desc, 6130 options=self._parse_key_constraint_options(), 6131 ) 6132 6133 expressions = self._parse_wrapped_csv( 6134 self._parse_primary_key_part, optional=wrapped_optional 6135 ) 6136 options = self._parse_key_constraint_options() 6137 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6138 6139 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6140 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6141 6142 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6143 """ 6144 Parses a datetime column in ODBC format. We parse the column into the corresponding 6145 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6146 same as we did for `DATE('yyyy-mm-dd')`. 6147 6148 Reference: 6149 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6150 """ 6151 self._match(TokenType.VAR) 6152 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6153 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6154 if not self._match(TokenType.R_BRACE): 6155 self.raise_error("Expected }") 6156 return expression 6157 6158 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6159 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6160 return this 6161 6162 bracket_kind = self._prev.token_type 6163 if ( 6164 bracket_kind == TokenType.L_BRACE 6165 and self._curr 6166 and self._curr.token_type == TokenType.VAR 6167 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6168 ): 6169 return self._parse_odbc_datetime_literal() 6170 6171 expressions = self._parse_csv( 6172 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6173 ) 6174 6175 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6176 self.raise_error("Expected ]") 6177 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6178 self.raise_error("Expected }") 6179 6180 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6181 if bracket_kind == TokenType.L_BRACE: 6182 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6183 elif not this: 6184 this = build_array_constructor( 6185 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6186 ) 6187 else: 6188 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6189 if constructor_type: 6190 return build_array_constructor( 6191 constructor_type, 6192 args=expressions, 6193 bracket_kind=bracket_kind, 6194 dialect=self.dialect, 6195 ) 6196 6197 expressions = apply_index_offset( 6198 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6199 ) 6200 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6201 6202 self._add_comments(this) 6203 return self._parse_bracket(this) 6204 6205 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6206 if self._match(TokenType.COLON): 6207 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6208 return this 6209 6210 def _parse_case(self) -> t.Optional[exp.Expression]: 6211 ifs = [] 6212 default = None 6213 6214 comments = self._prev_comments 6215 expression = self._parse_assignment() 6216 6217 while self._match(TokenType.WHEN): 6218 this = self._parse_assignment() 6219 self._match(TokenType.THEN) 6220 then = self._parse_assignment() 6221 ifs.append(self.expression(exp.If, this=this, true=then)) 6222 6223 if self._match(TokenType.ELSE): 6224 default = self._parse_assignment() 6225 6226 if not self._match(TokenType.END): 6227 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6228 default = exp.column("interval") 6229 else: 6230 self.raise_error("Expected END after CASE", self._prev) 6231 6232 return self.expression( 6233 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6234 ) 6235 6236 def _parse_if(self) -> t.Optional[exp.Expression]: 6237 if self._match(TokenType.L_PAREN): 6238 args = self._parse_csv( 6239 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6240 ) 6241 this = self.validate_expression(exp.If.from_arg_list(args), args) 6242 self._match_r_paren() 6243 else: 6244 index = self._index - 1 6245 6246 if self.NO_PAREN_IF_COMMANDS and index == 0: 6247 return self._parse_as_command(self._prev) 6248 6249 condition = self._parse_assignment() 6250 6251 if not condition: 6252 self._retreat(index) 6253 return None 6254 6255 self._match(TokenType.THEN) 6256 true = self._parse_assignment() 6257 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6258 self._match(TokenType.END) 6259 this = self.expression(exp.If, this=condition, true=true, false=false) 6260 6261 return this 6262 6263 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6264 if not self._match_text_seq("VALUE", "FOR"): 6265 self._retreat(self._index - 1) 6266 return None 6267 6268 return self.expression( 6269 exp.NextValueFor, 6270 this=self._parse_column(), 6271 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6272 ) 6273 6274 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6275 this = self._parse_function() or self._parse_var_or_string(upper=True) 6276 6277 if self._match(TokenType.FROM): 6278 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6279 6280 if not self._match(TokenType.COMMA): 6281 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6282 6283 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6284 6285 def _parse_gap_fill(self) -> exp.GapFill: 6286 self._match(TokenType.TABLE) 6287 this = self._parse_table() 6288 6289 self._match(TokenType.COMMA) 6290 args = [this, *self._parse_csv(self._parse_lambda)] 6291 6292 gap_fill = exp.GapFill.from_arg_list(args) 6293 return self.validate_expression(gap_fill, args) 6294 6295 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6296 this = self._parse_assignment() 6297 6298 if not self._match(TokenType.ALIAS): 6299 if self._match(TokenType.COMMA): 6300 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6301 6302 self.raise_error("Expected AS after CAST") 6303 6304 fmt = None 6305 to = self._parse_types() 6306 6307 default = self._match(TokenType.DEFAULT) 6308 if default: 6309 default = self._parse_bitwise() 6310 self._match_text_seq("ON", "CONVERSION", "ERROR") 6311 6312 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6313 fmt_string = self._parse_string() 6314 fmt = self._parse_at_time_zone(fmt_string) 6315 6316 if not to: 6317 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6318 if to.this in exp.DataType.TEMPORAL_TYPES: 6319 this = self.expression( 6320 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6321 this=this, 6322 format=exp.Literal.string( 6323 format_time( 6324 fmt_string.this if fmt_string else "", 6325 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6326 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6327 ) 6328 ), 6329 safe=safe, 6330 ) 6331 6332 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6333 this.set("zone", fmt.args["zone"]) 6334 return this 6335 elif not to: 6336 self.raise_error("Expected TYPE after CAST") 6337 elif isinstance(to, exp.Identifier): 6338 to = exp.DataType.build(to.name, udt=True) 6339 elif to.this == exp.DataType.Type.CHAR: 6340 if self._match(TokenType.CHARACTER_SET): 6341 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6342 6343 return self.expression( 6344 exp.Cast if strict else exp.TryCast, 6345 this=this, 6346 to=to, 6347 format=fmt, 6348 safe=safe, 6349 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6350 default=default, 6351 ) 6352 6353 def _parse_string_agg(self) -> exp.GroupConcat: 6354 if self._match(TokenType.DISTINCT): 6355 args: t.List[t.Optional[exp.Expression]] = [ 6356 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6357 ] 6358 if self._match(TokenType.COMMA): 6359 args.extend(self._parse_csv(self._parse_assignment)) 6360 else: 6361 args = self._parse_csv(self._parse_assignment) # type: ignore 6362 6363 if self._match_text_seq("ON", "OVERFLOW"): 6364 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6365 if self._match_text_seq("ERROR"): 6366 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6367 else: 6368 self._match_text_seq("TRUNCATE") 6369 on_overflow = self.expression( 6370 exp.OverflowTruncateBehavior, 6371 this=self._parse_string(), 6372 with_count=( 6373 self._match_text_seq("WITH", "COUNT") 6374 or not self._match_text_seq("WITHOUT", "COUNT") 6375 ), 6376 ) 6377 else: 6378 on_overflow = None 6379 6380 index = self._index 6381 if not self._match(TokenType.R_PAREN) and args: 6382 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6383 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6384 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6385 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6386 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6387 6388 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6389 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6390 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6391 if not self._match_text_seq("WITHIN", "GROUP"): 6392 self._retreat(index) 6393 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6394 6395 # The corresponding match_r_paren will be called in parse_function (caller) 6396 self._match_l_paren() 6397 6398 return self.expression( 6399 exp.GroupConcat, 6400 this=self._parse_order(this=seq_get(args, 0)), 6401 separator=seq_get(args, 1), 6402 on_overflow=on_overflow, 6403 ) 6404 6405 def _parse_convert( 6406 self, strict: bool, safe: t.Optional[bool] = None 6407 ) -> t.Optional[exp.Expression]: 6408 this = self._parse_bitwise() 6409 6410 if self._match(TokenType.USING): 6411 to: t.Optional[exp.Expression] = self.expression( 6412 exp.CharacterSet, this=self._parse_var() 6413 ) 6414 elif self._match(TokenType.COMMA): 6415 to = self._parse_types() 6416 else: 6417 to = None 6418 6419 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6420 6421 def _parse_xml_table(self) -> exp.XMLTable: 6422 namespaces = None 6423 passing = None 6424 columns = None 6425 6426 if self._match_text_seq("XMLNAMESPACES", "("): 6427 namespaces = self._parse_xml_namespace() 6428 self._match_text_seq(")", ",") 6429 6430 this = self._parse_string() 6431 6432 if self._match_text_seq("PASSING"): 6433 # The BY VALUE keywords are optional and are provided for semantic clarity 6434 self._match_text_seq("BY", "VALUE") 6435 passing = self._parse_csv(self._parse_column) 6436 6437 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6438 6439 if self._match_text_seq("COLUMNS"): 6440 columns = self._parse_csv(self._parse_field_def) 6441 6442 return self.expression( 6443 exp.XMLTable, 6444 this=this, 6445 namespaces=namespaces, 6446 passing=passing, 6447 columns=columns, 6448 by_ref=by_ref, 6449 ) 6450 6451 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6452 namespaces = [] 6453 6454 while True: 6455 if self._match(TokenType.DEFAULT): 6456 uri = self._parse_string() 6457 else: 6458 uri = self._parse_alias(self._parse_string()) 6459 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6460 if not self._match(TokenType.COMMA): 6461 break 6462 6463 return namespaces 6464 6465 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6466 """ 6467 There are generally two variants of the DECODE function: 6468 6469 - DECODE(bin, charset) 6470 - DECODE(expression, search, result [, search, result] ... [, default]) 6471 6472 The second variant will always be parsed into a CASE expression. Note that NULL 6473 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6474 instead of relying on pattern matching. 6475 """ 6476 args = self._parse_csv(self._parse_assignment) 6477 6478 if len(args) < 3: 6479 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6480 6481 expression, *expressions = args 6482 if not expression: 6483 return None 6484 6485 ifs = [] 6486 for search, result in zip(expressions[::2], expressions[1::2]): 6487 if not search or not result: 6488 return None 6489 6490 if isinstance(search, exp.Literal): 6491 ifs.append( 6492 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6493 ) 6494 elif isinstance(search, exp.Null): 6495 ifs.append( 6496 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6497 ) 6498 else: 6499 cond = exp.or_( 6500 exp.EQ(this=expression.copy(), expression=search), 6501 exp.and_( 6502 exp.Is(this=expression.copy(), expression=exp.Null()), 6503 exp.Is(this=search.copy(), expression=exp.Null()), 6504 copy=False, 6505 ), 6506 copy=False, 6507 ) 6508 ifs.append(exp.If(this=cond, true=result)) 6509 6510 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6511 6512 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6513 self._match_text_seq("KEY") 6514 key = self._parse_column() 6515 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6516 self._match_text_seq("VALUE") 6517 value = self._parse_bitwise() 6518 6519 if not key and not value: 6520 return None 6521 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6522 6523 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6524 if not this or not self._match_text_seq("FORMAT", "JSON"): 6525 return this 6526 6527 return self.expression(exp.FormatJson, this=this) 6528 6529 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6530 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6531 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6532 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6533 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6534 else: 6535 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6536 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6537 6538 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6539 6540 if not empty and not error and not null: 6541 return None 6542 6543 return self.expression( 6544 exp.OnCondition, 6545 empty=empty, 6546 error=error, 6547 null=null, 6548 ) 6549 6550 def _parse_on_handling( 6551 self, on: str, *values: str 6552 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6553 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6554 for value in values: 6555 if self._match_text_seq(value, "ON", on): 6556 return f"{value} ON {on}" 6557 6558 index = self._index 6559 if self._match(TokenType.DEFAULT): 6560 default_value = self._parse_bitwise() 6561 if self._match_text_seq("ON", on): 6562 return default_value 6563 6564 self._retreat(index) 6565 6566 return None 6567 6568 @t.overload 6569 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6570 6571 @t.overload 6572 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6573 6574 def _parse_json_object(self, agg=False): 6575 star = self._parse_star() 6576 expressions = ( 6577 [star] 6578 if star 6579 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6580 ) 6581 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6582 6583 unique_keys = None 6584 if self._match_text_seq("WITH", "UNIQUE"): 6585 unique_keys = True 6586 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6587 unique_keys = False 6588 6589 self._match_text_seq("KEYS") 6590 6591 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6592 self._parse_type() 6593 ) 6594 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6595 6596 return self.expression( 6597 exp.JSONObjectAgg if agg else exp.JSONObject, 6598 expressions=expressions, 6599 null_handling=null_handling, 6600 unique_keys=unique_keys, 6601 return_type=return_type, 6602 encoding=encoding, 6603 ) 6604 6605 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6606 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6607 if not self._match_text_seq("NESTED"): 6608 this = self._parse_id_var() 6609 kind = self._parse_types(allow_identifiers=False) 6610 nested = None 6611 else: 6612 this = None 6613 kind = None 6614 nested = True 6615 6616 path = self._match_text_seq("PATH") and self._parse_string() 6617 nested_schema = nested and self._parse_json_schema() 6618 6619 return self.expression( 6620 exp.JSONColumnDef, 6621 this=this, 6622 kind=kind, 6623 path=path, 6624 nested_schema=nested_schema, 6625 ) 6626 6627 def _parse_json_schema(self) -> exp.JSONSchema: 6628 self._match_text_seq("COLUMNS") 6629 return self.expression( 6630 exp.JSONSchema, 6631 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6632 ) 6633 6634 def _parse_json_table(self) -> exp.JSONTable: 6635 this = self._parse_format_json(self._parse_bitwise()) 6636 path = self._match(TokenType.COMMA) and self._parse_string() 6637 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6638 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6639 schema = self._parse_json_schema() 6640 6641 return exp.JSONTable( 6642 this=this, 6643 schema=schema, 6644 path=path, 6645 error_handling=error_handling, 6646 empty_handling=empty_handling, 6647 ) 6648 6649 def _parse_match_against(self) -> exp.MatchAgainst: 6650 expressions = self._parse_csv(self._parse_column) 6651 6652 self._match_text_seq(")", "AGAINST", "(") 6653 6654 this = self._parse_string() 6655 6656 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6657 modifier = "IN NATURAL LANGUAGE MODE" 6658 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6659 modifier = f"{modifier} WITH QUERY EXPANSION" 6660 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6661 modifier = "IN BOOLEAN MODE" 6662 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6663 modifier = "WITH QUERY EXPANSION" 6664 else: 6665 modifier = None 6666 6667 return self.expression( 6668 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6669 ) 6670 6671 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6672 def _parse_open_json(self) -> exp.OpenJSON: 6673 this = self._parse_bitwise() 6674 path = self._match(TokenType.COMMA) and self._parse_string() 6675 6676 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6677 this = self._parse_field(any_token=True) 6678 kind = self._parse_types() 6679 path = self._parse_string() 6680 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6681 6682 return self.expression( 6683 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6684 ) 6685 6686 expressions = None 6687 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6688 self._match_l_paren() 6689 expressions = self._parse_csv(_parse_open_json_column_def) 6690 6691 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6692 6693 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6694 args = self._parse_csv(self._parse_bitwise) 6695 6696 if self._match(TokenType.IN): 6697 return self.expression( 6698 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6699 ) 6700 6701 if haystack_first: 6702 haystack = seq_get(args, 0) 6703 needle = seq_get(args, 1) 6704 else: 6705 haystack = seq_get(args, 1) 6706 needle = seq_get(args, 0) 6707 6708 return self.expression( 6709 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6710 ) 6711 6712 def _parse_predict(self) -> exp.Predict: 6713 self._match_text_seq("MODEL") 6714 this = self._parse_table() 6715 6716 self._match(TokenType.COMMA) 6717 self._match_text_seq("TABLE") 6718 6719 return self.expression( 6720 exp.Predict, 6721 this=this, 6722 expression=self._parse_table(), 6723 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6724 ) 6725 6726 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6727 args = self._parse_csv(self._parse_table) 6728 return exp.JoinHint(this=func_name.upper(), expressions=args) 6729 6730 def _parse_substring(self) -> exp.Substring: 6731 # Postgres supports the form: substring(string [from int] [for int]) 6732 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6733 6734 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6735 6736 if self._match(TokenType.FROM): 6737 args.append(self._parse_bitwise()) 6738 if self._match(TokenType.FOR): 6739 if len(args) == 1: 6740 args.append(exp.Literal.number(1)) 6741 args.append(self._parse_bitwise()) 6742 6743 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6744 6745 def _parse_trim(self) -> exp.Trim: 6746 # https://www.w3resource.com/sql/character-functions/trim.php 6747 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6748 6749 position = None 6750 collation = None 6751 expression = None 6752 6753 if self._match_texts(self.TRIM_TYPES): 6754 position = self._prev.text.upper() 6755 6756 this = self._parse_bitwise() 6757 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6758 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6759 expression = self._parse_bitwise() 6760 6761 if invert_order: 6762 this, expression = expression, this 6763 6764 if self._match(TokenType.COLLATE): 6765 collation = self._parse_bitwise() 6766 6767 return self.expression( 6768 exp.Trim, this=this, position=position, expression=expression, collation=collation 6769 ) 6770 6771 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6772 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6773 6774 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6775 return self._parse_window(self._parse_id_var(), alias=True) 6776 6777 def _parse_respect_or_ignore_nulls( 6778 self, this: t.Optional[exp.Expression] 6779 ) -> t.Optional[exp.Expression]: 6780 if self._match_text_seq("IGNORE", "NULLS"): 6781 return self.expression(exp.IgnoreNulls, this=this) 6782 if self._match_text_seq("RESPECT", "NULLS"): 6783 return self.expression(exp.RespectNulls, this=this) 6784 return this 6785 6786 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6787 if self._match(TokenType.HAVING): 6788 self._match_texts(("MAX", "MIN")) 6789 max = self._prev.text.upper() != "MIN" 6790 return self.expression( 6791 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6792 ) 6793 6794 return this 6795 6796 def _parse_window( 6797 self, this: t.Optional[exp.Expression], alias: bool = False 6798 ) -> t.Optional[exp.Expression]: 6799 func = this 6800 comments = func.comments if isinstance(func, exp.Expression) else None 6801 6802 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6803 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6804 if self._match_text_seq("WITHIN", "GROUP"): 6805 order = self._parse_wrapped(self._parse_order) 6806 this = self.expression(exp.WithinGroup, this=this, expression=order) 6807 6808 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6809 self._match(TokenType.WHERE) 6810 this = self.expression( 6811 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6812 ) 6813 self._match_r_paren() 6814 6815 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6816 # Some dialects choose to implement and some do not. 6817 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6818 6819 # There is some code above in _parse_lambda that handles 6820 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6821 6822 # The below changes handle 6823 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6824 6825 # Oracle allows both formats 6826 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6827 # and Snowflake chose to do the same for familiarity 6828 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6829 if isinstance(this, exp.AggFunc): 6830 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6831 6832 if ignore_respect and ignore_respect is not this: 6833 ignore_respect.replace(ignore_respect.this) 6834 this = self.expression(ignore_respect.__class__, this=this) 6835 6836 this = self._parse_respect_or_ignore_nulls(this) 6837 6838 # bigquery select from window x AS (partition by ...) 6839 if alias: 6840 over = None 6841 self._match(TokenType.ALIAS) 6842 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6843 return this 6844 else: 6845 over = self._prev.text.upper() 6846 6847 if comments and isinstance(func, exp.Expression): 6848 func.pop_comments() 6849 6850 if not self._match(TokenType.L_PAREN): 6851 return self.expression( 6852 exp.Window, 6853 comments=comments, 6854 this=this, 6855 alias=self._parse_id_var(False), 6856 over=over, 6857 ) 6858 6859 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6860 6861 first = self._match(TokenType.FIRST) 6862 if self._match_text_seq("LAST"): 6863 first = False 6864 6865 partition, order = self._parse_partition_and_order() 6866 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6867 6868 if kind: 6869 self._match(TokenType.BETWEEN) 6870 start = self._parse_window_spec() 6871 self._match(TokenType.AND) 6872 end = self._parse_window_spec() 6873 6874 spec = self.expression( 6875 exp.WindowSpec, 6876 kind=kind, 6877 start=start["value"], 6878 start_side=start["side"], 6879 end=end["value"], 6880 end_side=end["side"], 6881 ) 6882 else: 6883 spec = None 6884 6885 self._match_r_paren() 6886 6887 window = self.expression( 6888 exp.Window, 6889 comments=comments, 6890 this=this, 6891 partition_by=partition, 6892 order=order, 6893 spec=spec, 6894 alias=window_alias, 6895 over=over, 6896 first=first, 6897 ) 6898 6899 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6900 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6901 return self._parse_window(window, alias=alias) 6902 6903 return window 6904 6905 def _parse_partition_and_order( 6906 self, 6907 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6908 return self._parse_partition_by(), self._parse_order() 6909 6910 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6911 self._match(TokenType.BETWEEN) 6912 6913 return { 6914 "value": ( 6915 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6916 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6917 or self._parse_bitwise() 6918 ), 6919 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6920 } 6921 6922 def _parse_alias( 6923 self, this: t.Optional[exp.Expression], explicit: bool = False 6924 ) -> t.Optional[exp.Expression]: 6925 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6926 # so this section tries to parse the clause version and if it fails, it treats the token 6927 # as an identifier (alias) 6928 if self._can_parse_limit_or_offset(): 6929 return this 6930 6931 any_token = self._match(TokenType.ALIAS) 6932 comments = self._prev_comments or [] 6933 6934 if explicit and not any_token: 6935 return this 6936 6937 if self._match(TokenType.L_PAREN): 6938 aliases = self.expression( 6939 exp.Aliases, 6940 comments=comments, 6941 this=this, 6942 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6943 ) 6944 self._match_r_paren(aliases) 6945 return aliases 6946 6947 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6948 self.STRING_ALIASES and self._parse_string_as_identifier() 6949 ) 6950 6951 if alias: 6952 comments.extend(alias.pop_comments()) 6953 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6954 column = this.this 6955 6956 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6957 if not this.comments and column and column.comments: 6958 this.comments = column.pop_comments() 6959 6960 return this 6961 6962 def _parse_id_var( 6963 self, 6964 any_token: bool = True, 6965 tokens: t.Optional[t.Collection[TokenType]] = None, 6966 ) -> t.Optional[exp.Expression]: 6967 expression = self._parse_identifier() 6968 if not expression and ( 6969 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6970 ): 6971 quoted = self._prev.token_type == TokenType.STRING 6972 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6973 6974 return expression 6975 6976 def _parse_string(self) -> t.Optional[exp.Expression]: 6977 if self._match_set(self.STRING_PARSERS): 6978 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6979 return self._parse_placeholder() 6980 6981 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6982 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6983 6984 def _parse_number(self) -> t.Optional[exp.Expression]: 6985 if self._match_set(self.NUMERIC_PARSERS): 6986 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6987 return self._parse_placeholder() 6988 6989 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6990 if self._match(TokenType.IDENTIFIER): 6991 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6992 return self._parse_placeholder() 6993 6994 def _parse_var( 6995 self, 6996 any_token: bool = False, 6997 tokens: t.Optional[t.Collection[TokenType]] = None, 6998 upper: bool = False, 6999 ) -> t.Optional[exp.Expression]: 7000 if ( 7001 (any_token and self._advance_any()) 7002 or self._match(TokenType.VAR) 7003 or (self._match_set(tokens) if tokens else False) 7004 ): 7005 return self.expression( 7006 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7007 ) 7008 return self._parse_placeholder() 7009 7010 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7011 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7012 self._advance() 7013 return self._prev 7014 return None 7015 7016 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7017 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7018 7019 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7020 return self._parse_primary() or self._parse_var(any_token=True) 7021 7022 def _parse_null(self) -> t.Optional[exp.Expression]: 7023 if self._match_set(self.NULL_TOKENS): 7024 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7025 return self._parse_placeholder() 7026 7027 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7028 if self._match(TokenType.TRUE): 7029 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7030 if self._match(TokenType.FALSE): 7031 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7032 return self._parse_placeholder() 7033 7034 def _parse_star(self) -> t.Optional[exp.Expression]: 7035 if self._match(TokenType.STAR): 7036 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7037 return self._parse_placeholder() 7038 7039 def _parse_parameter(self) -> exp.Parameter: 7040 this = self._parse_identifier() or self._parse_primary_or_var() 7041 return self.expression(exp.Parameter, this=this) 7042 7043 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7044 if self._match_set(self.PLACEHOLDER_PARSERS): 7045 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7046 if placeholder: 7047 return placeholder 7048 self._advance(-1) 7049 return None 7050 7051 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7052 if not self._match_texts(keywords): 7053 return None 7054 if self._match(TokenType.L_PAREN, advance=False): 7055 return self._parse_wrapped_csv(self._parse_expression) 7056 7057 expression = self._parse_expression() 7058 return [expression] if expression else None 7059 7060 def _parse_csv( 7061 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7062 ) -> t.List[exp.Expression]: 7063 parse_result = parse_method() 7064 items = [parse_result] if parse_result is not None else [] 7065 7066 while self._match(sep): 7067 self._add_comments(parse_result) 7068 parse_result = parse_method() 7069 if parse_result is not None: 7070 items.append(parse_result) 7071 7072 return items 7073 7074 def _parse_tokens( 7075 self, parse_method: t.Callable, expressions: t.Dict 7076 ) -> t.Optional[exp.Expression]: 7077 this = parse_method() 7078 7079 while self._match_set(expressions): 7080 this = self.expression( 7081 expressions[self._prev.token_type], 7082 this=this, 7083 comments=self._prev_comments, 7084 expression=parse_method(), 7085 ) 7086 7087 return this 7088 7089 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7090 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7091 7092 def _parse_wrapped_csv( 7093 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7094 ) -> t.List[exp.Expression]: 7095 return self._parse_wrapped( 7096 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7097 ) 7098 7099 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7100 wrapped = self._match(TokenType.L_PAREN) 7101 if not wrapped and not optional: 7102 self.raise_error("Expecting (") 7103 parse_result = parse_method() 7104 if wrapped: 7105 self._match_r_paren() 7106 return parse_result 7107 7108 def _parse_expressions(self) -> t.List[exp.Expression]: 7109 return self._parse_csv(self._parse_expression) 7110 7111 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7112 return self._parse_select() or self._parse_set_operations( 7113 self._parse_alias(self._parse_assignment(), explicit=True) 7114 if alias 7115 else self._parse_assignment() 7116 ) 7117 7118 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7119 return self._parse_query_modifiers( 7120 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7121 ) 7122 7123 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7124 this = None 7125 if self._match_texts(self.TRANSACTION_KIND): 7126 this = self._prev.text 7127 7128 self._match_texts(("TRANSACTION", "WORK")) 7129 7130 modes = [] 7131 while True: 7132 mode = [] 7133 while self._match(TokenType.VAR): 7134 mode.append(self._prev.text) 7135 7136 if mode: 7137 modes.append(" ".join(mode)) 7138 if not self._match(TokenType.COMMA): 7139 break 7140 7141 return self.expression(exp.Transaction, this=this, modes=modes) 7142 7143 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7144 chain = None 7145 savepoint = None 7146 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7147 7148 self._match_texts(("TRANSACTION", "WORK")) 7149 7150 if self._match_text_seq("TO"): 7151 self._match_text_seq("SAVEPOINT") 7152 savepoint = self._parse_id_var() 7153 7154 if self._match(TokenType.AND): 7155 chain = not self._match_text_seq("NO") 7156 self._match_text_seq("CHAIN") 7157 7158 if is_rollback: 7159 return self.expression(exp.Rollback, savepoint=savepoint) 7160 7161 return self.expression(exp.Commit, chain=chain) 7162 7163 def _parse_refresh(self) -> exp.Refresh: 7164 self._match(TokenType.TABLE) 7165 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7166 7167 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7168 if not self._match_text_seq("ADD"): 7169 return None 7170 7171 self._match(TokenType.COLUMN) 7172 exists_column = self._parse_exists(not_=True) 7173 expression = self._parse_field_def() 7174 7175 if expression: 7176 expression.set("exists", exists_column) 7177 7178 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7179 if self._match_texts(("FIRST", "AFTER")): 7180 position = self._prev.text 7181 column_position = self.expression( 7182 exp.ColumnPosition, this=self._parse_column(), position=position 7183 ) 7184 expression.set("position", column_position) 7185 7186 return expression 7187 7188 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7189 drop = self._match(TokenType.DROP) and self._parse_drop() 7190 if drop and not isinstance(drop, exp.Command): 7191 drop.set("kind", drop.args.get("kind", "COLUMN")) 7192 return drop 7193 7194 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7195 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7196 return self.expression( 7197 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7198 ) 7199 7200 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7201 index = self._index - 1 7202 7203 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7204 return self._parse_csv( 7205 lambda: self.expression( 7206 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7207 ) 7208 ) 7209 7210 self._retreat(index) 7211 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7212 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7213 7214 if self._match_text_seq("ADD", "COLUMNS"): 7215 schema = self._parse_schema() 7216 if schema: 7217 return [schema] 7218 return [] 7219 7220 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7221 7222 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7223 if self._match_texts(self.ALTER_ALTER_PARSERS): 7224 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7225 7226 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7227 # keyword after ALTER we default to parsing this statement 7228 self._match(TokenType.COLUMN) 7229 column = self._parse_field(any_token=True) 7230 7231 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7232 return self.expression(exp.AlterColumn, this=column, drop=True) 7233 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7234 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7235 if self._match(TokenType.COMMENT): 7236 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7237 if self._match_text_seq("DROP", "NOT", "NULL"): 7238 return self.expression( 7239 exp.AlterColumn, 7240 this=column, 7241 drop=True, 7242 allow_null=True, 7243 ) 7244 if self._match_text_seq("SET", "NOT", "NULL"): 7245 return self.expression( 7246 exp.AlterColumn, 7247 this=column, 7248 allow_null=False, 7249 ) 7250 7251 if self._match_text_seq("SET", "VISIBLE"): 7252 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7253 if self._match_text_seq("SET", "INVISIBLE"): 7254 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7255 7256 self._match_text_seq("SET", "DATA") 7257 self._match_text_seq("TYPE") 7258 return self.expression( 7259 exp.AlterColumn, 7260 this=column, 7261 dtype=self._parse_types(), 7262 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7263 using=self._match(TokenType.USING) and self._parse_assignment(), 7264 ) 7265 7266 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7267 if self._match_texts(("ALL", "EVEN", "AUTO")): 7268 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7269 7270 self._match_text_seq("KEY", "DISTKEY") 7271 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7272 7273 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7274 if compound: 7275 self._match_text_seq("SORTKEY") 7276 7277 if self._match(TokenType.L_PAREN, advance=False): 7278 return self.expression( 7279 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7280 ) 7281 7282 self._match_texts(("AUTO", "NONE")) 7283 return self.expression( 7284 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7285 ) 7286 7287 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7288 index = self._index - 1 7289 7290 partition_exists = self._parse_exists() 7291 if self._match(TokenType.PARTITION, advance=False): 7292 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7293 7294 self._retreat(index) 7295 return self._parse_csv(self._parse_drop_column) 7296 7297 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7298 if self._match(TokenType.COLUMN): 7299 exists = self._parse_exists() 7300 old_column = self._parse_column() 7301 to = self._match_text_seq("TO") 7302 new_column = self._parse_column() 7303 7304 if old_column is None or to is None or new_column is None: 7305 return None 7306 7307 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7308 7309 self._match_text_seq("TO") 7310 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7311 7312 def _parse_alter_table_set(self) -> exp.AlterSet: 7313 alter_set = self.expression(exp.AlterSet) 7314 7315 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7316 "TABLE", "PROPERTIES" 7317 ): 7318 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7319 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7320 alter_set.set("expressions", [self._parse_assignment()]) 7321 elif self._match_texts(("LOGGED", "UNLOGGED")): 7322 alter_set.set("option", exp.var(self._prev.text.upper())) 7323 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7324 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7325 elif self._match_text_seq("LOCATION"): 7326 alter_set.set("location", self._parse_field()) 7327 elif self._match_text_seq("ACCESS", "METHOD"): 7328 alter_set.set("access_method", self._parse_field()) 7329 elif self._match_text_seq("TABLESPACE"): 7330 alter_set.set("tablespace", self._parse_field()) 7331 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7332 alter_set.set("file_format", [self._parse_field()]) 7333 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7334 alter_set.set("file_format", self._parse_wrapped_options()) 7335 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7336 alter_set.set("copy_options", self._parse_wrapped_options()) 7337 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7338 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7339 else: 7340 if self._match_text_seq("SERDE"): 7341 alter_set.set("serde", self._parse_field()) 7342 7343 alter_set.set("expressions", [self._parse_properties()]) 7344 7345 return alter_set 7346 7347 def _parse_alter(self) -> exp.Alter | exp.Command: 7348 start = self._prev 7349 7350 alter_token = self._match_set(self.ALTERABLES) and self._prev 7351 if not alter_token: 7352 return self._parse_as_command(start) 7353 7354 exists = self._parse_exists() 7355 only = self._match_text_seq("ONLY") 7356 this = self._parse_table(schema=True) 7357 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7358 7359 if self._next: 7360 self._advance() 7361 7362 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7363 if parser: 7364 actions = ensure_list(parser(self)) 7365 not_valid = self._match_text_seq("NOT", "VALID") 7366 options = self._parse_csv(self._parse_property) 7367 7368 if not self._curr and actions: 7369 return self.expression( 7370 exp.Alter, 7371 this=this, 7372 kind=alter_token.text.upper(), 7373 exists=exists, 7374 actions=actions, 7375 only=only, 7376 options=options, 7377 cluster=cluster, 7378 not_valid=not_valid, 7379 ) 7380 7381 return self._parse_as_command(start) 7382 7383 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7384 start = self._prev 7385 # https://duckdb.org/docs/sql/statements/analyze 7386 if not self._curr: 7387 return self.expression(exp.Analyze) 7388 7389 options = [] 7390 while self._match_texts(self.ANALYZE_STYLES): 7391 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7392 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7393 else: 7394 options.append(self._prev.text.upper()) 7395 7396 this: t.Optional[exp.Expression] = None 7397 inner_expression: t.Optional[exp.Expression] = None 7398 7399 kind = self._curr and self._curr.text.upper() 7400 7401 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7402 this = self._parse_table_parts() 7403 elif self._match_text_seq("TABLES"): 7404 if self._match_set((TokenType.FROM, TokenType.IN)): 7405 kind = f"{kind} {self._prev.text.upper()}" 7406 this = self._parse_table(schema=True, is_db_reference=True) 7407 elif self._match_text_seq("DATABASE"): 7408 this = self._parse_table(schema=True, is_db_reference=True) 7409 elif self._match_text_seq("CLUSTER"): 7410 this = self._parse_table() 7411 # Try matching inner expr keywords before fallback to parse table. 7412 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7413 kind = None 7414 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7415 else: 7416 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7417 kind = None 7418 this = self._parse_table_parts() 7419 7420 partition = self._try_parse(self._parse_partition) 7421 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7422 return self._parse_as_command(start) 7423 7424 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7425 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7426 "WITH", "ASYNC", "MODE" 7427 ): 7428 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7429 else: 7430 mode = None 7431 7432 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7433 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7434 7435 properties = self._parse_properties() 7436 return self.expression( 7437 exp.Analyze, 7438 kind=kind, 7439 this=this, 7440 mode=mode, 7441 partition=partition, 7442 properties=properties, 7443 expression=inner_expression, 7444 options=options, 7445 ) 7446 7447 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7448 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7449 this = None 7450 kind = self._prev.text.upper() 7451 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7452 expressions = [] 7453 7454 if not self._match_text_seq("STATISTICS"): 7455 self.raise_error("Expecting token STATISTICS") 7456 7457 if self._match_text_seq("NOSCAN"): 7458 this = "NOSCAN" 7459 elif self._match(TokenType.FOR): 7460 if self._match_text_seq("ALL", "COLUMNS"): 7461 this = "FOR ALL COLUMNS" 7462 if self._match_texts("COLUMNS"): 7463 this = "FOR COLUMNS" 7464 expressions = self._parse_csv(self._parse_column_reference) 7465 elif self._match_text_seq("SAMPLE"): 7466 sample = self._parse_number() 7467 expressions = [ 7468 self.expression( 7469 exp.AnalyzeSample, 7470 sample=sample, 7471 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7472 ) 7473 ] 7474 7475 return self.expression( 7476 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7477 ) 7478 7479 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7480 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7481 kind = None 7482 this = None 7483 expression: t.Optional[exp.Expression] = None 7484 if self._match_text_seq("REF", "UPDATE"): 7485 kind = "REF" 7486 this = "UPDATE" 7487 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7488 this = "UPDATE SET DANGLING TO NULL" 7489 elif self._match_text_seq("STRUCTURE"): 7490 kind = "STRUCTURE" 7491 if self._match_text_seq("CASCADE", "FAST"): 7492 this = "CASCADE FAST" 7493 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7494 ("ONLINE", "OFFLINE") 7495 ): 7496 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7497 expression = self._parse_into() 7498 7499 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7500 7501 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7502 this = self._prev.text.upper() 7503 if self._match_text_seq("COLUMNS"): 7504 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7505 return None 7506 7507 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7508 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7509 if self._match_text_seq("STATISTICS"): 7510 return self.expression(exp.AnalyzeDelete, kind=kind) 7511 return None 7512 7513 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7514 if self._match_text_seq("CHAINED", "ROWS"): 7515 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7516 return None 7517 7518 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7519 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7520 this = self._prev.text.upper() 7521 expression: t.Optional[exp.Expression] = None 7522 expressions = [] 7523 update_options = None 7524 7525 if self._match_text_seq("HISTOGRAM", "ON"): 7526 expressions = self._parse_csv(self._parse_column_reference) 7527 with_expressions = [] 7528 while self._match(TokenType.WITH): 7529 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7530 if self._match_texts(("SYNC", "ASYNC")): 7531 if self._match_text_seq("MODE", advance=False): 7532 with_expressions.append(f"{self._prev.text.upper()} MODE") 7533 self._advance() 7534 else: 7535 buckets = self._parse_number() 7536 if self._match_text_seq("BUCKETS"): 7537 with_expressions.append(f"{buckets} BUCKETS") 7538 if with_expressions: 7539 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7540 7541 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7542 TokenType.UPDATE, advance=False 7543 ): 7544 update_options = self._prev.text.upper() 7545 self._advance() 7546 elif self._match_text_seq("USING", "DATA"): 7547 expression = self.expression(exp.UsingData, this=self._parse_string()) 7548 7549 return self.expression( 7550 exp.AnalyzeHistogram, 7551 this=this, 7552 expressions=expressions, 7553 expression=expression, 7554 update_options=update_options, 7555 ) 7556 7557 def _parse_merge(self) -> exp.Merge: 7558 self._match(TokenType.INTO) 7559 target = self._parse_table() 7560 7561 if target and self._match(TokenType.ALIAS, advance=False): 7562 target.set("alias", self._parse_table_alias()) 7563 7564 self._match(TokenType.USING) 7565 using = self._parse_table() 7566 7567 self._match(TokenType.ON) 7568 on = self._parse_assignment() 7569 7570 return self.expression( 7571 exp.Merge, 7572 this=target, 7573 using=using, 7574 on=on, 7575 whens=self._parse_when_matched(), 7576 returning=self._parse_returning(), 7577 ) 7578 7579 def _parse_when_matched(self) -> exp.Whens: 7580 whens = [] 7581 7582 while self._match(TokenType.WHEN): 7583 matched = not self._match(TokenType.NOT) 7584 self._match_text_seq("MATCHED") 7585 source = ( 7586 False 7587 if self._match_text_seq("BY", "TARGET") 7588 else self._match_text_seq("BY", "SOURCE") 7589 ) 7590 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7591 7592 self._match(TokenType.THEN) 7593 7594 if self._match(TokenType.INSERT): 7595 this = self._parse_star() 7596 if this: 7597 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7598 else: 7599 then = self.expression( 7600 exp.Insert, 7601 this=exp.var("ROW") 7602 if self._match_text_seq("ROW") 7603 else self._parse_value(values=False), 7604 expression=self._match_text_seq("VALUES") and self._parse_value(), 7605 ) 7606 elif self._match(TokenType.UPDATE): 7607 expressions = self._parse_star() 7608 if expressions: 7609 then = self.expression(exp.Update, expressions=expressions) 7610 else: 7611 then = self.expression( 7612 exp.Update, 7613 expressions=self._match(TokenType.SET) 7614 and self._parse_csv(self._parse_equality), 7615 ) 7616 elif self._match(TokenType.DELETE): 7617 then = self.expression(exp.Var, this=self._prev.text) 7618 else: 7619 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7620 7621 whens.append( 7622 self.expression( 7623 exp.When, 7624 matched=matched, 7625 source=source, 7626 condition=condition, 7627 then=then, 7628 ) 7629 ) 7630 return self.expression(exp.Whens, expressions=whens) 7631 7632 def _parse_show(self) -> t.Optional[exp.Expression]: 7633 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7634 if parser: 7635 return parser(self) 7636 return self._parse_as_command(self._prev) 7637 7638 def _parse_set_item_assignment( 7639 self, kind: t.Optional[str] = None 7640 ) -> t.Optional[exp.Expression]: 7641 index = self._index 7642 7643 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7644 return self._parse_set_transaction(global_=kind == "GLOBAL") 7645 7646 left = self._parse_primary() or self._parse_column() 7647 assignment_delimiter = self._match_texts(("=", "TO")) 7648 7649 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7650 self._retreat(index) 7651 return None 7652 7653 right = self._parse_statement() or self._parse_id_var() 7654 if isinstance(right, (exp.Column, exp.Identifier)): 7655 right = exp.var(right.name) 7656 7657 this = self.expression(exp.EQ, this=left, expression=right) 7658 return self.expression(exp.SetItem, this=this, kind=kind) 7659 7660 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7661 self._match_text_seq("TRANSACTION") 7662 characteristics = self._parse_csv( 7663 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7664 ) 7665 return self.expression( 7666 exp.SetItem, 7667 expressions=characteristics, 7668 kind="TRANSACTION", 7669 **{"global": global_}, # type: ignore 7670 ) 7671 7672 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7673 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7674 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7675 7676 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7677 index = self._index 7678 set_ = self.expression( 7679 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7680 ) 7681 7682 if self._curr: 7683 self._retreat(index) 7684 return self._parse_as_command(self._prev) 7685 7686 return set_ 7687 7688 def _parse_var_from_options( 7689 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7690 ) -> t.Optional[exp.Var]: 7691 start = self._curr 7692 if not start: 7693 return None 7694 7695 option = start.text.upper() 7696 continuations = options.get(option) 7697 7698 index = self._index 7699 self._advance() 7700 for keywords in continuations or []: 7701 if isinstance(keywords, str): 7702 keywords = (keywords,) 7703 7704 if self._match_text_seq(*keywords): 7705 option = f"{option} {' '.join(keywords)}" 7706 break 7707 else: 7708 if continuations or continuations is None: 7709 if raise_unmatched: 7710 self.raise_error(f"Unknown option {option}") 7711 7712 self._retreat(index) 7713 return None 7714 7715 return exp.var(option) 7716 7717 def _parse_as_command(self, start: Token) -> exp.Command: 7718 while self._curr: 7719 self._advance() 7720 text = self._find_sql(start, self._prev) 7721 size = len(start.text) 7722 self._warn_unsupported() 7723 return exp.Command(this=text[:size], expression=text[size:]) 7724 7725 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7726 settings = [] 7727 7728 self._match_l_paren() 7729 kind = self._parse_id_var() 7730 7731 if self._match(TokenType.L_PAREN): 7732 while True: 7733 key = self._parse_id_var() 7734 value = self._parse_primary() 7735 if not key and value is None: 7736 break 7737 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7738 self._match(TokenType.R_PAREN) 7739 7740 self._match_r_paren() 7741 7742 return self.expression( 7743 exp.DictProperty, 7744 this=this, 7745 kind=kind.this if kind else None, 7746 settings=settings, 7747 ) 7748 7749 def _parse_dict_range(self, this: str) -> exp.DictRange: 7750 self._match_l_paren() 7751 has_min = self._match_text_seq("MIN") 7752 if has_min: 7753 min = self._parse_var() or self._parse_primary() 7754 self._match_text_seq("MAX") 7755 max = self._parse_var() or self._parse_primary() 7756 else: 7757 max = self._parse_var() or self._parse_primary() 7758 min = exp.Literal.number(0) 7759 self._match_r_paren() 7760 return self.expression(exp.DictRange, this=this, min=min, max=max) 7761 7762 def _parse_comprehension( 7763 self, this: t.Optional[exp.Expression] 7764 ) -> t.Optional[exp.Comprehension]: 7765 index = self._index 7766 expression = self._parse_column() 7767 if not self._match(TokenType.IN): 7768 self._retreat(index - 1) 7769 return None 7770 iterator = self._parse_column() 7771 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7772 return self.expression( 7773 exp.Comprehension, 7774 this=this, 7775 expression=expression, 7776 iterator=iterator, 7777 condition=condition, 7778 ) 7779 7780 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7781 if self._match(TokenType.HEREDOC_STRING): 7782 return self.expression(exp.Heredoc, this=self._prev.text) 7783 7784 if not self._match_text_seq("$"): 7785 return None 7786 7787 tags = ["$"] 7788 tag_text = None 7789 7790 if self._is_connected(): 7791 self._advance() 7792 tags.append(self._prev.text.upper()) 7793 else: 7794 self.raise_error("No closing $ found") 7795 7796 if tags[-1] != "$": 7797 if self._is_connected() and self._match_text_seq("$"): 7798 tag_text = tags[-1] 7799 tags.append("$") 7800 else: 7801 self.raise_error("No closing $ found") 7802 7803 heredoc_start = self._curr 7804 7805 while self._curr: 7806 if self._match_text_seq(*tags, advance=False): 7807 this = self._find_sql(heredoc_start, self._prev) 7808 self._advance(len(tags)) 7809 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7810 7811 self._advance() 7812 7813 self.raise_error(f"No closing {''.join(tags)} found") 7814 return None 7815 7816 def _find_parser( 7817 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7818 ) -> t.Optional[t.Callable]: 7819 if not self._curr: 7820 return None 7821 7822 index = self._index 7823 this = [] 7824 while True: 7825 # The current token might be multiple words 7826 curr = self._curr.text.upper() 7827 key = curr.split(" ") 7828 this.append(curr) 7829 7830 self._advance() 7831 result, trie = in_trie(trie, key) 7832 if result == TrieResult.FAILED: 7833 break 7834 7835 if result == TrieResult.EXISTS: 7836 subparser = parsers[" ".join(this)] 7837 return subparser 7838 7839 self._retreat(index) 7840 return None 7841 7842 def _match(self, token_type, advance=True, expression=None): 7843 if not self._curr: 7844 return None 7845 7846 if self._curr.token_type == token_type: 7847 if advance: 7848 self._advance() 7849 self._add_comments(expression) 7850 return True 7851 7852 return None 7853 7854 def _match_set(self, types, advance=True): 7855 if not self._curr: 7856 return None 7857 7858 if self._curr.token_type in types: 7859 if advance: 7860 self._advance() 7861 return True 7862 7863 return None 7864 7865 def _match_pair(self, token_type_a, token_type_b, advance=True): 7866 if not self._curr or not self._next: 7867 return None 7868 7869 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7870 if advance: 7871 self._advance(2) 7872 return True 7873 7874 return None 7875 7876 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7877 if not self._match(TokenType.L_PAREN, expression=expression): 7878 self.raise_error("Expecting (") 7879 7880 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7881 if not self._match(TokenType.R_PAREN, expression=expression): 7882 self.raise_error("Expecting )") 7883 7884 def _match_texts(self, texts, advance=True): 7885 if ( 7886 self._curr 7887 and self._curr.token_type != TokenType.STRING 7888 and self._curr.text.upper() in texts 7889 ): 7890 if advance: 7891 self._advance() 7892 return True 7893 return None 7894 7895 def _match_text_seq(self, *texts, advance=True): 7896 index = self._index 7897 for text in texts: 7898 if ( 7899 self._curr 7900 and self._curr.token_type != TokenType.STRING 7901 and self._curr.text.upper() == text 7902 ): 7903 self._advance() 7904 else: 7905 self._retreat(index) 7906 return None 7907 7908 if not advance: 7909 self._retreat(index) 7910 7911 return True 7912 7913 def _replace_lambda( 7914 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7915 ) -> t.Optional[exp.Expression]: 7916 if not node: 7917 return node 7918 7919 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7920 7921 for column in node.find_all(exp.Column): 7922 typ = lambda_types.get(column.parts[0].name) 7923 if typ is not None: 7924 dot_or_id = column.to_dot() if column.table else column.this 7925 7926 if typ: 7927 dot_or_id = self.expression( 7928 exp.Cast, 7929 this=dot_or_id, 7930 to=typ, 7931 ) 7932 7933 parent = column.parent 7934 7935 while isinstance(parent, exp.Dot): 7936 if not isinstance(parent.parent, exp.Dot): 7937 parent.replace(dot_or_id) 7938 break 7939 parent = parent.parent 7940 else: 7941 if column is node: 7942 node = dot_or_id 7943 else: 7944 column.replace(dot_or_id) 7945 return node 7946 7947 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7948 start = self._prev 7949 7950 # Not to be confused with TRUNCATE(number, decimals) function call 7951 if self._match(TokenType.L_PAREN): 7952 self._retreat(self._index - 2) 7953 return self._parse_function() 7954 7955 # Clickhouse supports TRUNCATE DATABASE as well 7956 is_database = self._match(TokenType.DATABASE) 7957 7958 self._match(TokenType.TABLE) 7959 7960 exists = self._parse_exists(not_=False) 7961 7962 expressions = self._parse_csv( 7963 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7964 ) 7965 7966 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7967 7968 if self._match_text_seq("RESTART", "IDENTITY"): 7969 identity = "RESTART" 7970 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7971 identity = "CONTINUE" 7972 else: 7973 identity = None 7974 7975 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7976 option = self._prev.text 7977 else: 7978 option = None 7979 7980 partition = self._parse_partition() 7981 7982 # Fallback case 7983 if self._curr: 7984 return self._parse_as_command(start) 7985 7986 return self.expression( 7987 exp.TruncateTable, 7988 expressions=expressions, 7989 is_database=is_database, 7990 exists=exists, 7991 cluster=cluster, 7992 identity=identity, 7993 option=option, 7994 partition=partition, 7995 ) 7996 7997 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7998 this = self._parse_ordered(self._parse_opclass) 7999 8000 if not self._match(TokenType.WITH): 8001 return this 8002 8003 op = self._parse_var(any_token=True) 8004 8005 return self.expression(exp.WithOperator, this=this, op=op) 8006 8007 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8008 self._match(TokenType.EQ) 8009 self._match(TokenType.L_PAREN) 8010 8011 opts: t.List[t.Optional[exp.Expression]] = [] 8012 option: exp.Expression | None 8013 while self._curr and not self._match(TokenType.R_PAREN): 8014 if self._match_text_seq("FORMAT_NAME", "="): 8015 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8016 option = self._parse_format_name() 8017 else: 8018 option = self._parse_property() 8019 8020 if option is None: 8021 self.raise_error("Unable to parse option") 8022 break 8023 8024 opts.append(option) 8025 8026 return opts 8027 8028 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8029 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8030 8031 options = [] 8032 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8033 option = self._parse_var(any_token=True) 8034 prev = self._prev.text.upper() 8035 8036 # Different dialects might separate options and values by white space, "=" and "AS" 8037 self._match(TokenType.EQ) 8038 self._match(TokenType.ALIAS) 8039 8040 param = self.expression(exp.CopyParameter, this=option) 8041 8042 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8043 TokenType.L_PAREN, advance=False 8044 ): 8045 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8046 param.set("expressions", self._parse_wrapped_options()) 8047 elif prev == "FILE_FORMAT": 8048 # T-SQL's external file format case 8049 param.set("expression", self._parse_field()) 8050 else: 8051 param.set("expression", self._parse_unquoted_field()) 8052 8053 options.append(param) 8054 self._match(sep) 8055 8056 return options 8057 8058 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8059 expr = self.expression(exp.Credentials) 8060 8061 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8062 expr.set("storage", self._parse_field()) 8063 if self._match_text_seq("CREDENTIALS"): 8064 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8065 creds = ( 8066 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8067 ) 8068 expr.set("credentials", creds) 8069 if self._match_text_seq("ENCRYPTION"): 8070 expr.set("encryption", self._parse_wrapped_options()) 8071 if self._match_text_seq("IAM_ROLE"): 8072 expr.set("iam_role", self._parse_field()) 8073 if self._match_text_seq("REGION"): 8074 expr.set("region", self._parse_field()) 8075 8076 return expr 8077 8078 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8079 return self._parse_field() 8080 8081 def _parse_copy(self) -> exp.Copy | exp.Command: 8082 start = self._prev 8083 8084 self._match(TokenType.INTO) 8085 8086 this = ( 8087 self._parse_select(nested=True, parse_subquery_alias=False) 8088 if self._match(TokenType.L_PAREN, advance=False) 8089 else self._parse_table(schema=True) 8090 ) 8091 8092 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8093 8094 files = self._parse_csv(self._parse_file_location) 8095 credentials = self._parse_credentials() 8096 8097 self._match_text_seq("WITH") 8098 8099 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8100 8101 # Fallback case 8102 if self._curr: 8103 return self._parse_as_command(start) 8104 8105 return self.expression( 8106 exp.Copy, 8107 this=this, 8108 kind=kind, 8109 credentials=credentials, 8110 files=files, 8111 params=params, 8112 ) 8113 8114 def _parse_normalize(self) -> exp.Normalize: 8115 return self.expression( 8116 exp.Normalize, 8117 this=self._parse_bitwise(), 8118 form=self._match(TokenType.COMMA) and self._parse_var(), 8119 ) 8120 8121 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8122 args = self._parse_csv(lambda: self._parse_lambda()) 8123 8124 this = seq_get(args, 0) 8125 decimals = seq_get(args, 1) 8126 8127 return expr_type( 8128 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8129 ) 8130 8131 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8132 if self._match_text_seq("COLUMNS", "(", advance=False): 8133 this = self._parse_function() 8134 if isinstance(this, exp.Columns): 8135 this.set("unpack", True) 8136 return this 8137 8138 return self.expression( 8139 exp.Star, 8140 **{ # type: ignore 8141 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8142 "replace": self._parse_star_op("REPLACE"), 8143 "rename": self._parse_star_op("RENAME"), 8144 }, 8145 ) 8146 8147 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8148 privilege_parts = [] 8149 8150 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8151 # (end of privilege list) or L_PAREN (start of column list) are met 8152 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8153 privilege_parts.append(self._curr.text.upper()) 8154 self._advance() 8155 8156 this = exp.var(" ".join(privilege_parts)) 8157 expressions = ( 8158 self._parse_wrapped_csv(self._parse_column) 8159 if self._match(TokenType.L_PAREN, advance=False) 8160 else None 8161 ) 8162 8163 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8164 8165 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8166 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8167 principal = self._parse_id_var() 8168 8169 if not principal: 8170 return None 8171 8172 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8173 8174 def _parse_grant(self) -> exp.Grant | exp.Command: 8175 start = self._prev 8176 8177 privileges = self._parse_csv(self._parse_grant_privilege) 8178 8179 self._match(TokenType.ON) 8180 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8181 8182 # Attempt to parse the securable e.g. MySQL allows names 8183 # such as "foo.*", "*.*" which are not easily parseable yet 8184 securable = self._try_parse(self._parse_table_parts) 8185 8186 if not securable or not self._match_text_seq("TO"): 8187 return self._parse_as_command(start) 8188 8189 principals = self._parse_csv(self._parse_grant_principal) 8190 8191 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8192 8193 if self._curr: 8194 return self._parse_as_command(start) 8195 8196 return self.expression( 8197 exp.Grant, 8198 privileges=privileges, 8199 kind=kind, 8200 securable=securable, 8201 principals=principals, 8202 grant_option=grant_option, 8203 ) 8204 8205 def _parse_overlay(self) -> exp.Overlay: 8206 return self.expression( 8207 exp.Overlay, 8208 **{ # type: ignore 8209 "this": self._parse_bitwise(), 8210 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8211 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8212 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8213 }, 8214 ) 8215 8216 def _parse_format_name(self) -> exp.Property: 8217 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8218 # for FILE_FORMAT = <format_name> 8219 return self.expression( 8220 exp.Property, 8221 this=exp.var("FORMAT_NAME"), 8222 value=self._parse_string() or self._parse_table_parts(), 8223 )
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
176class Parser(metaclass=_Parser): 177 """ 178 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 179 180 Args: 181 error_level: The desired error level. 182 Default: ErrorLevel.IMMEDIATE 183 error_message_context: The amount of context to capture from a query string when displaying 184 the error message (in number of characters). 185 Default: 100 186 max_errors: Maximum number of error messages to include in a raised ParseError. 187 This is only relevant if error_level is ErrorLevel.RAISE. 188 Default: 3 189 """ 190 191 FUNCTIONS: t.Dict[str, t.Callable] = { 192 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 193 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 194 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 195 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 196 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 197 ), 198 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 199 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 200 ), 201 "CHAR": lambda args: exp.Chr(expressions=args), 202 "CHR": lambda args: exp.Chr(expressions=args), 203 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 204 "CONCAT": lambda args, dialect: exp.Concat( 205 expressions=args, 206 safe=not dialect.STRICT_STRING_CONCAT, 207 coalesce=dialect.CONCAT_COALESCE, 208 ), 209 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONVERT_TIMEZONE": build_convert_timezone, 215 "DATE_TO_DATE_STR": lambda args: exp.Cast( 216 this=seq_get(args, 0), 217 to=exp.DataType(this=exp.DataType.Type.TEXT), 218 ), 219 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 220 start=seq_get(args, 0), 221 end=seq_get(args, 1), 222 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 223 ), 224 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 225 "HEX": build_hex, 226 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 227 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 228 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 229 "LIKE": build_like, 230 "LOG": build_logarithm, 231 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 232 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 233 "LOWER": build_lower, 234 "LPAD": lambda args: build_pad(args), 235 "LEFTPAD": lambda args: build_pad(args), 236 "LTRIM": lambda args: build_trim(args), 237 "MOD": build_mod, 238 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 239 "RPAD": lambda args: build_pad(args, is_left=False), 240 "RTRIM": lambda args: build_trim(args, is_left=False), 241 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 242 if len(args) != 2 243 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 244 "STRPOS": exp.StrPosition.from_arg_list, 245 "CHARINDEX": lambda args: build_locate_strposition(args), 246 "INSTR": exp.StrPosition.from_arg_list, 247 "LOCATE": lambda args: build_locate_strposition(args), 248 "TIME_TO_TIME_STR": lambda args: exp.Cast( 249 this=seq_get(args, 0), 250 to=exp.DataType(this=exp.DataType.Type.TEXT), 251 ), 252 "TO_HEX": build_hex, 253 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 254 this=exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 start=exp.Literal.number(1), 259 length=exp.Literal.number(10), 260 ), 261 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 262 "UPPER": build_upper, 263 "VAR_MAP": build_var_map, 264 } 265 266 NO_PAREN_FUNCTIONS = { 267 TokenType.CURRENT_DATE: exp.CurrentDate, 268 TokenType.CURRENT_DATETIME: exp.CurrentDate, 269 TokenType.CURRENT_TIME: exp.CurrentTime, 270 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 271 TokenType.CURRENT_USER: exp.CurrentUser, 272 } 273 274 STRUCT_TYPE_TOKENS = { 275 TokenType.NESTED, 276 TokenType.OBJECT, 277 TokenType.STRUCT, 278 TokenType.UNION, 279 } 280 281 NESTED_TYPE_TOKENS = { 282 TokenType.ARRAY, 283 TokenType.LIST, 284 TokenType.LOWCARDINALITY, 285 TokenType.MAP, 286 TokenType.NULLABLE, 287 TokenType.RANGE, 288 *STRUCT_TYPE_TOKENS, 289 } 290 291 ENUM_TYPE_TOKENS = { 292 TokenType.DYNAMIC, 293 TokenType.ENUM, 294 TokenType.ENUM8, 295 TokenType.ENUM16, 296 } 297 298 AGGREGATE_TYPE_TOKENS = { 299 TokenType.AGGREGATEFUNCTION, 300 TokenType.SIMPLEAGGREGATEFUNCTION, 301 } 302 303 TYPE_TOKENS = { 304 TokenType.BIT, 305 TokenType.BOOLEAN, 306 TokenType.TINYINT, 307 TokenType.UTINYINT, 308 TokenType.SMALLINT, 309 TokenType.USMALLINT, 310 TokenType.INT, 311 TokenType.UINT, 312 TokenType.BIGINT, 313 TokenType.UBIGINT, 314 TokenType.INT128, 315 TokenType.UINT128, 316 TokenType.INT256, 317 TokenType.UINT256, 318 TokenType.MEDIUMINT, 319 TokenType.UMEDIUMINT, 320 TokenType.FIXEDSTRING, 321 TokenType.FLOAT, 322 TokenType.DOUBLE, 323 TokenType.UDOUBLE, 324 TokenType.CHAR, 325 TokenType.NCHAR, 326 TokenType.VARCHAR, 327 TokenType.NVARCHAR, 328 TokenType.BPCHAR, 329 TokenType.TEXT, 330 TokenType.MEDIUMTEXT, 331 TokenType.LONGTEXT, 332 TokenType.BLOB, 333 TokenType.MEDIUMBLOB, 334 TokenType.LONGBLOB, 335 TokenType.BINARY, 336 TokenType.VARBINARY, 337 TokenType.JSON, 338 TokenType.JSONB, 339 TokenType.INTERVAL, 340 TokenType.TINYBLOB, 341 TokenType.TINYTEXT, 342 TokenType.TIME, 343 TokenType.TIMETZ, 344 TokenType.TIMESTAMP, 345 TokenType.TIMESTAMP_S, 346 TokenType.TIMESTAMP_MS, 347 TokenType.TIMESTAMP_NS, 348 TokenType.TIMESTAMPTZ, 349 TokenType.TIMESTAMPLTZ, 350 TokenType.TIMESTAMPNTZ, 351 TokenType.DATETIME, 352 TokenType.DATETIME2, 353 TokenType.DATETIME64, 354 TokenType.SMALLDATETIME, 355 TokenType.DATE, 356 TokenType.DATE32, 357 TokenType.INT4RANGE, 358 TokenType.INT4MULTIRANGE, 359 TokenType.INT8RANGE, 360 TokenType.INT8MULTIRANGE, 361 TokenType.NUMRANGE, 362 TokenType.NUMMULTIRANGE, 363 TokenType.TSRANGE, 364 TokenType.TSMULTIRANGE, 365 TokenType.TSTZRANGE, 366 TokenType.TSTZMULTIRANGE, 367 TokenType.DATERANGE, 368 TokenType.DATEMULTIRANGE, 369 TokenType.DECIMAL, 370 TokenType.DECIMAL32, 371 TokenType.DECIMAL64, 372 TokenType.DECIMAL128, 373 TokenType.DECIMAL256, 374 TokenType.UDECIMAL, 375 TokenType.BIGDECIMAL, 376 TokenType.UUID, 377 TokenType.GEOGRAPHY, 378 TokenType.GEOMETRY, 379 TokenType.POINT, 380 TokenType.RING, 381 TokenType.LINESTRING, 382 TokenType.MULTILINESTRING, 383 TokenType.POLYGON, 384 TokenType.MULTIPOLYGON, 385 TokenType.HLLSKETCH, 386 TokenType.HSTORE, 387 TokenType.PSEUDO_TYPE, 388 TokenType.SUPER, 389 TokenType.SERIAL, 390 TokenType.SMALLSERIAL, 391 TokenType.BIGSERIAL, 392 TokenType.XML, 393 TokenType.YEAR, 394 TokenType.USERDEFINED, 395 TokenType.MONEY, 396 TokenType.SMALLMONEY, 397 TokenType.ROWVERSION, 398 TokenType.IMAGE, 399 TokenType.VARIANT, 400 TokenType.VECTOR, 401 TokenType.OBJECT, 402 TokenType.OBJECT_IDENTIFIER, 403 TokenType.INET, 404 TokenType.IPADDRESS, 405 TokenType.IPPREFIX, 406 TokenType.IPV4, 407 TokenType.IPV6, 408 TokenType.UNKNOWN, 409 TokenType.NOTHING, 410 TokenType.NULL, 411 TokenType.NAME, 412 TokenType.TDIGEST, 413 TokenType.DYNAMIC, 414 *ENUM_TYPE_TOKENS, 415 *NESTED_TYPE_TOKENS, 416 *AGGREGATE_TYPE_TOKENS, 417 } 418 419 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 420 TokenType.BIGINT: TokenType.UBIGINT, 421 TokenType.INT: TokenType.UINT, 422 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 423 TokenType.SMALLINT: TokenType.USMALLINT, 424 TokenType.TINYINT: TokenType.UTINYINT, 425 TokenType.DECIMAL: TokenType.UDECIMAL, 426 TokenType.DOUBLE: TokenType.UDOUBLE, 427 } 428 429 SUBQUERY_PREDICATES = { 430 TokenType.ANY: exp.Any, 431 TokenType.ALL: exp.All, 432 TokenType.EXISTS: exp.Exists, 433 TokenType.SOME: exp.Any, 434 } 435 436 RESERVED_TOKENS = { 437 *Tokenizer.SINGLE_TOKENS.values(), 438 TokenType.SELECT, 439 } - {TokenType.IDENTIFIER} 440 441 DB_CREATABLES = { 442 TokenType.DATABASE, 443 TokenType.DICTIONARY, 444 TokenType.FILE_FORMAT, 445 TokenType.MODEL, 446 TokenType.NAMESPACE, 447 TokenType.SCHEMA, 448 TokenType.SEQUENCE, 449 TokenType.SINK, 450 TokenType.SOURCE, 451 TokenType.STAGE, 452 TokenType.STORAGE_INTEGRATION, 453 TokenType.STREAMLIT, 454 TokenType.TABLE, 455 TokenType.TAG, 456 TokenType.VIEW, 457 TokenType.WAREHOUSE, 458 } 459 460 CREATABLES = { 461 TokenType.COLUMN, 462 TokenType.CONSTRAINT, 463 TokenType.FOREIGN_KEY, 464 TokenType.FUNCTION, 465 TokenType.INDEX, 466 TokenType.PROCEDURE, 467 *DB_CREATABLES, 468 } 469 470 ALTERABLES = { 471 TokenType.INDEX, 472 TokenType.TABLE, 473 TokenType.VIEW, 474 } 475 476 # Tokens that can represent identifiers 477 ID_VAR_TOKENS = { 478 TokenType.ALL, 479 TokenType.ATTACH, 480 TokenType.VAR, 481 TokenType.ANTI, 482 TokenType.APPLY, 483 TokenType.ASC, 484 TokenType.ASOF, 485 TokenType.AUTO_INCREMENT, 486 TokenType.BEGIN, 487 TokenType.BPCHAR, 488 TokenType.CACHE, 489 TokenType.CASE, 490 TokenType.COLLATE, 491 TokenType.COMMAND, 492 TokenType.COMMENT, 493 TokenType.COMMIT, 494 TokenType.CONSTRAINT, 495 TokenType.COPY, 496 TokenType.CUBE, 497 TokenType.CURRENT_SCHEMA, 498 TokenType.DEFAULT, 499 TokenType.DELETE, 500 TokenType.DESC, 501 TokenType.DESCRIBE, 502 TokenType.DETACH, 503 TokenType.DICTIONARY, 504 TokenType.DIV, 505 TokenType.END, 506 TokenType.EXECUTE, 507 TokenType.EXPORT, 508 TokenType.ESCAPE, 509 TokenType.FALSE, 510 TokenType.FIRST, 511 TokenType.FILTER, 512 TokenType.FINAL, 513 TokenType.FORMAT, 514 TokenType.FULL, 515 TokenType.IDENTIFIER, 516 TokenType.IS, 517 TokenType.ISNULL, 518 TokenType.INTERVAL, 519 TokenType.KEEP, 520 TokenType.KILL, 521 TokenType.LEFT, 522 TokenType.LIMIT, 523 TokenType.LOAD, 524 TokenType.MERGE, 525 TokenType.NATURAL, 526 TokenType.NEXT, 527 TokenType.OFFSET, 528 TokenType.OPERATOR, 529 TokenType.ORDINALITY, 530 TokenType.OVERLAPS, 531 TokenType.OVERWRITE, 532 TokenType.PARTITION, 533 TokenType.PERCENT, 534 TokenType.PIVOT, 535 TokenType.PRAGMA, 536 TokenType.PUT, 537 TokenType.RANGE, 538 TokenType.RECURSIVE, 539 TokenType.REFERENCES, 540 TokenType.REFRESH, 541 TokenType.RENAME, 542 TokenType.REPLACE, 543 TokenType.RIGHT, 544 TokenType.ROLLUP, 545 TokenType.ROW, 546 TokenType.ROWS, 547 TokenType.SEMI, 548 TokenType.SET, 549 TokenType.SETTINGS, 550 TokenType.SHOW, 551 TokenType.TEMPORARY, 552 TokenType.TOP, 553 TokenType.TRUE, 554 TokenType.TRUNCATE, 555 TokenType.UNIQUE, 556 TokenType.UNNEST, 557 TokenType.UNPIVOT, 558 TokenType.UPDATE, 559 TokenType.USE, 560 TokenType.VOLATILE, 561 TokenType.WINDOW, 562 *CREATABLES, 563 *SUBQUERY_PREDICATES, 564 *TYPE_TOKENS, 565 *NO_PAREN_FUNCTIONS, 566 } 567 ID_VAR_TOKENS.remove(TokenType.UNION) 568 569 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 570 TokenType.ANTI, 571 TokenType.APPLY, 572 TokenType.ASOF, 573 TokenType.FULL, 574 TokenType.LEFT, 575 TokenType.LOCK, 576 TokenType.NATURAL, 577 TokenType.RIGHT, 578 TokenType.SEMI, 579 TokenType.WINDOW, 580 } 581 582 ALIAS_TOKENS = ID_VAR_TOKENS 583 584 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 585 586 ARRAY_CONSTRUCTORS = { 587 "ARRAY": exp.Array, 588 "LIST": exp.List, 589 } 590 591 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 592 593 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 594 595 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 596 597 FUNC_TOKENS = { 598 TokenType.COLLATE, 599 TokenType.COMMAND, 600 TokenType.CURRENT_DATE, 601 TokenType.CURRENT_DATETIME, 602 TokenType.CURRENT_SCHEMA, 603 TokenType.CURRENT_TIMESTAMP, 604 TokenType.CURRENT_TIME, 605 TokenType.CURRENT_USER, 606 TokenType.FILTER, 607 TokenType.FIRST, 608 TokenType.FORMAT, 609 TokenType.GLOB, 610 TokenType.IDENTIFIER, 611 TokenType.INDEX, 612 TokenType.ISNULL, 613 TokenType.ILIKE, 614 TokenType.INSERT, 615 TokenType.LIKE, 616 TokenType.MERGE, 617 TokenType.NEXT, 618 TokenType.OFFSET, 619 TokenType.PRIMARY_KEY, 620 TokenType.RANGE, 621 TokenType.REPLACE, 622 TokenType.RLIKE, 623 TokenType.ROW, 624 TokenType.UNNEST, 625 TokenType.VAR, 626 TokenType.LEFT, 627 TokenType.RIGHT, 628 TokenType.SEQUENCE, 629 TokenType.DATE, 630 TokenType.DATETIME, 631 TokenType.TABLE, 632 TokenType.TIMESTAMP, 633 TokenType.TIMESTAMPTZ, 634 TokenType.TRUNCATE, 635 TokenType.WINDOW, 636 TokenType.XOR, 637 *TYPE_TOKENS, 638 *SUBQUERY_PREDICATES, 639 } 640 641 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 642 TokenType.AND: exp.And, 643 } 644 645 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.COLON_EQ: exp.PropertyEQ, 647 } 648 649 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.OR: exp.Or, 651 } 652 653 EQUALITY = { 654 TokenType.EQ: exp.EQ, 655 TokenType.NEQ: exp.NEQ, 656 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 657 } 658 659 COMPARISON = { 660 TokenType.GT: exp.GT, 661 TokenType.GTE: exp.GTE, 662 TokenType.LT: exp.LT, 663 TokenType.LTE: exp.LTE, 664 } 665 666 BITWISE = { 667 TokenType.AMP: exp.BitwiseAnd, 668 TokenType.CARET: exp.BitwiseXor, 669 TokenType.PIPE: exp.BitwiseOr, 670 } 671 672 TERM = { 673 TokenType.DASH: exp.Sub, 674 TokenType.PLUS: exp.Add, 675 TokenType.MOD: exp.Mod, 676 TokenType.COLLATE: exp.Collate, 677 } 678 679 FACTOR = { 680 TokenType.DIV: exp.IntDiv, 681 TokenType.LR_ARROW: exp.Distance, 682 TokenType.SLASH: exp.Div, 683 TokenType.STAR: exp.Mul, 684 } 685 686 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 687 688 TIMES = { 689 TokenType.TIME, 690 TokenType.TIMETZ, 691 } 692 693 TIMESTAMPS = { 694 TokenType.TIMESTAMP, 695 TokenType.TIMESTAMPNTZ, 696 TokenType.TIMESTAMPTZ, 697 TokenType.TIMESTAMPLTZ, 698 *TIMES, 699 } 700 701 SET_OPERATIONS = { 702 TokenType.UNION, 703 TokenType.INTERSECT, 704 TokenType.EXCEPT, 705 } 706 707 JOIN_METHODS = { 708 TokenType.ASOF, 709 TokenType.NATURAL, 710 TokenType.POSITIONAL, 711 } 712 713 JOIN_SIDES = { 714 TokenType.LEFT, 715 TokenType.RIGHT, 716 TokenType.FULL, 717 } 718 719 JOIN_KINDS = { 720 TokenType.ANTI, 721 TokenType.CROSS, 722 TokenType.INNER, 723 TokenType.OUTER, 724 TokenType.SEMI, 725 TokenType.STRAIGHT_JOIN, 726 } 727 728 JOIN_HINTS: t.Set[str] = set() 729 730 LAMBDAS = { 731 TokenType.ARROW: lambda self, expressions: self.expression( 732 exp.Lambda, 733 this=self._replace_lambda( 734 self._parse_assignment(), 735 expressions, 736 ), 737 expressions=expressions, 738 ), 739 TokenType.FARROW: lambda self, expressions: self.expression( 740 exp.Kwarg, 741 this=exp.var(expressions[0].name), 742 expression=self._parse_assignment(), 743 ), 744 } 745 746 COLUMN_OPERATORS = { 747 TokenType.DOT: None, 748 TokenType.DOTCOLON: lambda self, this, to: self.expression( 749 exp.JSONCast, 750 this=this, 751 to=to, 752 ), 753 TokenType.DCOLON: lambda self, this, to: self.expression( 754 exp.Cast if self.STRICT_CAST else exp.TryCast, 755 this=this, 756 to=to, 757 ), 758 TokenType.ARROW: lambda self, this, path: self.expression( 759 exp.JSONExtract, 760 this=this, 761 expression=self.dialect.to_json_path(path), 762 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 763 ), 764 TokenType.DARROW: lambda self, this, path: self.expression( 765 exp.JSONExtractScalar, 766 this=this, 767 expression=self.dialect.to_json_path(path), 768 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 769 ), 770 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 771 exp.JSONBExtract, 772 this=this, 773 expression=path, 774 ), 775 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 776 exp.JSONBExtractScalar, 777 this=this, 778 expression=path, 779 ), 780 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 781 exp.JSONBContains, 782 this=this, 783 expression=key, 784 ), 785 } 786 787 EXPRESSION_PARSERS = { 788 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 789 exp.Column: lambda self: self._parse_column(), 790 exp.Condition: lambda self: self._parse_assignment(), 791 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 792 exp.Expression: lambda self: self._parse_expression(), 793 exp.From: lambda self: self._parse_from(joins=True), 794 exp.Group: lambda self: self._parse_group(), 795 exp.Having: lambda self: self._parse_having(), 796 exp.Hint: lambda self: self._parse_hint_body(), 797 exp.Identifier: lambda self: self._parse_id_var(), 798 exp.Join: lambda self: self._parse_join(), 799 exp.Lambda: lambda self: self._parse_lambda(), 800 exp.Lateral: lambda self: self._parse_lateral(), 801 exp.Limit: lambda self: self._parse_limit(), 802 exp.Offset: lambda self: self._parse_offset(), 803 exp.Order: lambda self: self._parse_order(), 804 exp.Ordered: lambda self: self._parse_ordered(), 805 exp.Properties: lambda self: self._parse_properties(), 806 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 807 exp.Qualify: lambda self: self._parse_qualify(), 808 exp.Returning: lambda self: self._parse_returning(), 809 exp.Select: lambda self: self._parse_select(), 810 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 811 exp.Table: lambda self: self._parse_table_parts(), 812 exp.TableAlias: lambda self: self._parse_table_alias(), 813 exp.Tuple: lambda self: self._parse_value(values=False), 814 exp.Whens: lambda self: self._parse_when_matched(), 815 exp.Where: lambda self: self._parse_where(), 816 exp.Window: lambda self: self._parse_named_window(), 817 exp.With: lambda self: self._parse_with(), 818 "JOIN_TYPE": lambda self: self._parse_join_parts(), 819 } 820 821 STATEMENT_PARSERS = { 822 TokenType.ALTER: lambda self: self._parse_alter(), 823 TokenType.ANALYZE: lambda self: self._parse_analyze(), 824 TokenType.BEGIN: lambda self: self._parse_transaction(), 825 TokenType.CACHE: lambda self: self._parse_cache(), 826 TokenType.COMMENT: lambda self: self._parse_comment(), 827 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 828 TokenType.COPY: lambda self: self._parse_copy(), 829 TokenType.CREATE: lambda self: self._parse_create(), 830 TokenType.DELETE: lambda self: self._parse_delete(), 831 TokenType.DESC: lambda self: self._parse_describe(), 832 TokenType.DESCRIBE: lambda self: self._parse_describe(), 833 TokenType.DROP: lambda self: self._parse_drop(), 834 TokenType.GRANT: lambda self: self._parse_grant(), 835 TokenType.INSERT: lambda self: self._parse_insert(), 836 TokenType.KILL: lambda self: self._parse_kill(), 837 TokenType.LOAD: lambda self: self._parse_load(), 838 TokenType.MERGE: lambda self: self._parse_merge(), 839 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 840 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 841 TokenType.REFRESH: lambda self: self._parse_refresh(), 842 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 843 TokenType.SET: lambda self: self._parse_set(), 844 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 845 TokenType.UNCACHE: lambda self: self._parse_uncache(), 846 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 847 TokenType.UPDATE: lambda self: self._parse_update(), 848 TokenType.USE: lambda self: self._parse_use(), 849 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 850 } 851 852 UNARY_PARSERS = { 853 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 854 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 855 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 856 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 857 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 858 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 859 } 860 861 STRING_PARSERS = { 862 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 863 exp.RawString, this=token.text 864 ), 865 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 866 exp.National, this=token.text 867 ), 868 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 869 TokenType.STRING: lambda self, token: self.expression( 870 exp.Literal, this=token.text, is_string=True 871 ), 872 TokenType.UNICODE_STRING: lambda self, token: self.expression( 873 exp.UnicodeString, 874 this=token.text, 875 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 876 ), 877 } 878 879 NUMERIC_PARSERS = { 880 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 881 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 882 TokenType.HEX_STRING: lambda self, token: self.expression( 883 exp.HexString, 884 this=token.text, 885 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 886 ), 887 TokenType.NUMBER: lambda self, token: self.expression( 888 exp.Literal, this=token.text, is_string=False 889 ), 890 } 891 892 PRIMARY_PARSERS = { 893 **STRING_PARSERS, 894 **NUMERIC_PARSERS, 895 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 896 TokenType.NULL: lambda self, _: self.expression(exp.Null), 897 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 898 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 899 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 900 TokenType.STAR: lambda self, _: self._parse_star_ops(), 901 } 902 903 PLACEHOLDER_PARSERS = { 904 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 905 TokenType.PARAMETER: lambda self: self._parse_parameter(), 906 TokenType.COLON: lambda self: ( 907 self.expression(exp.Placeholder, this=self._prev.text) 908 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 909 else None 910 ), 911 } 912 913 RANGE_PARSERS = { 914 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 915 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 916 TokenType.GLOB: binary_range_parser(exp.Glob), 917 TokenType.ILIKE: binary_range_parser(exp.ILike), 918 TokenType.IN: lambda self, this: self._parse_in(this), 919 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 920 TokenType.IS: lambda self, this: self._parse_is(this), 921 TokenType.LIKE: binary_range_parser(exp.Like), 922 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 923 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 924 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 925 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 926 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 927 } 928 929 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 930 "ALLOWED_VALUES": lambda self: self.expression( 931 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 932 ), 933 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 934 "AUTO": lambda self: self._parse_auto_property(), 935 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 936 "BACKUP": lambda self: self.expression( 937 exp.BackupProperty, this=self._parse_var(any_token=True) 938 ), 939 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 940 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 941 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 942 "CHECKSUM": lambda self: self._parse_checksum(), 943 "CLUSTER BY": lambda self: self._parse_cluster(), 944 "CLUSTERED": lambda self: self._parse_clustered_by(), 945 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 946 exp.CollateProperty, **kwargs 947 ), 948 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 949 "CONTAINS": lambda self: self._parse_contains_property(), 950 "COPY": lambda self: self._parse_copy_property(), 951 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 952 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 953 "DEFINER": lambda self: self._parse_definer(), 954 "DETERMINISTIC": lambda self: self.expression( 955 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 956 ), 957 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 958 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 959 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 960 "DISTKEY": lambda self: self._parse_distkey(), 961 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 962 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 963 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 964 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 965 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 966 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 967 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 968 "FREESPACE": lambda self: self._parse_freespace(), 969 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 970 "HEAP": lambda self: self.expression(exp.HeapProperty), 971 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 972 "IMMUTABLE": lambda self: self.expression( 973 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 974 ), 975 "INHERITS": lambda self: self.expression( 976 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 977 ), 978 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 979 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 980 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 981 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 982 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 983 "LIKE": lambda self: self._parse_create_like(), 984 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 985 "LOCK": lambda self: self._parse_locking(), 986 "LOCKING": lambda self: self._parse_locking(), 987 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 988 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 989 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 990 "MODIFIES": lambda self: self._parse_modifies_property(), 991 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 992 "NO": lambda self: self._parse_no_property(), 993 "ON": lambda self: self._parse_on_property(), 994 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 995 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 996 "PARTITION": lambda self: self._parse_partitioned_of(), 997 "PARTITION BY": lambda self: self._parse_partitioned_by(), 998 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 999 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1000 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1001 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1002 "READS": lambda self: self._parse_reads_property(), 1003 "REMOTE": lambda self: self._parse_remote_with_connection(), 1004 "RETURNS": lambda self: self._parse_returns(), 1005 "STRICT": lambda self: self.expression(exp.StrictProperty), 1006 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1007 "ROW": lambda self: self._parse_row(), 1008 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1009 "SAMPLE": lambda self: self.expression( 1010 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1011 ), 1012 "SECURE": lambda self: self.expression(exp.SecureProperty), 1013 "SECURITY": lambda self: self._parse_security(), 1014 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1015 "SETTINGS": lambda self: self._parse_settings_property(), 1016 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1017 "SORTKEY": lambda self: self._parse_sortkey(), 1018 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1019 "STABLE": lambda self: self.expression( 1020 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1021 ), 1022 "STORED": lambda self: self._parse_stored(), 1023 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1024 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1025 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1026 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1027 "TO": lambda self: self._parse_to_table(), 1028 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1029 "TRANSFORM": lambda self: self.expression( 1030 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1031 ), 1032 "TTL": lambda self: self._parse_ttl(), 1033 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1034 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1035 "VOLATILE": lambda self: self._parse_volatile_property(), 1036 "WITH": lambda self: self._parse_with_property(), 1037 } 1038 1039 CONSTRAINT_PARSERS = { 1040 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1041 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1042 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1043 "CHARACTER SET": lambda self: self.expression( 1044 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1045 ), 1046 "CHECK": lambda self: self.expression( 1047 exp.CheckColumnConstraint, 1048 this=self._parse_wrapped(self._parse_assignment), 1049 enforced=self._match_text_seq("ENFORCED"), 1050 ), 1051 "COLLATE": lambda self: self.expression( 1052 exp.CollateColumnConstraint, 1053 this=self._parse_identifier() or self._parse_column(), 1054 ), 1055 "COMMENT": lambda self: self.expression( 1056 exp.CommentColumnConstraint, this=self._parse_string() 1057 ), 1058 "COMPRESS": lambda self: self._parse_compress(), 1059 "CLUSTERED": lambda self: self.expression( 1060 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1061 ), 1062 "NONCLUSTERED": lambda self: self.expression( 1063 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1064 ), 1065 "DEFAULT": lambda self: self.expression( 1066 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1067 ), 1068 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1069 "EPHEMERAL": lambda self: self.expression( 1070 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1071 ), 1072 "EXCLUDE": lambda self: self.expression( 1073 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1074 ), 1075 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1076 "FORMAT": lambda self: self.expression( 1077 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1078 ), 1079 "GENERATED": lambda self: self._parse_generated_as_identity(), 1080 "IDENTITY": lambda self: self._parse_auto_increment(), 1081 "INLINE": lambda self: self._parse_inline(), 1082 "LIKE": lambda self: self._parse_create_like(), 1083 "NOT": lambda self: self._parse_not_constraint(), 1084 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1085 "ON": lambda self: ( 1086 self._match(TokenType.UPDATE) 1087 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1088 ) 1089 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1090 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1091 "PERIOD": lambda self: self._parse_period_for_system_time(), 1092 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1093 "REFERENCES": lambda self: self._parse_references(match=False), 1094 "TITLE": lambda self: self.expression( 1095 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1096 ), 1097 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1098 "UNIQUE": lambda self: self._parse_unique(), 1099 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1100 "WATERMARK": lambda self: self.expression( 1101 exp.WatermarkColumnConstraint, 1102 this=self._match(TokenType.FOR) and self._parse_column(), 1103 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1104 ), 1105 "WITH": lambda self: self.expression( 1106 exp.Properties, expressions=self._parse_wrapped_properties() 1107 ), 1108 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1109 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1110 } 1111 1112 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1113 klass = ( 1114 exp.PartitionedByBucket 1115 if self._prev.text.upper() == "BUCKET" 1116 else exp.PartitionByTruncate 1117 ) 1118 1119 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1120 this, expression = seq_get(args, 0), seq_get(args, 1) 1121 1122 if isinstance(this, exp.Literal): 1123 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1124 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1125 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1126 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1127 # 1128 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1129 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1130 this, expression = expression, this 1131 1132 return self.expression(klass, this=this, expression=expression) 1133 1134 ALTER_PARSERS = { 1135 "ADD": lambda self: self._parse_alter_table_add(), 1136 "AS": lambda self: self._parse_select(), 1137 "ALTER": lambda self: self._parse_alter_table_alter(), 1138 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1139 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1140 "DROP": lambda self: self._parse_alter_table_drop(), 1141 "RENAME": lambda self: self._parse_alter_table_rename(), 1142 "SET": lambda self: self._parse_alter_table_set(), 1143 "SWAP": lambda self: self.expression( 1144 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1145 ), 1146 } 1147 1148 ALTER_ALTER_PARSERS = { 1149 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1150 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1151 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1152 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1153 } 1154 1155 SCHEMA_UNNAMED_CONSTRAINTS = { 1156 "CHECK", 1157 "EXCLUDE", 1158 "FOREIGN KEY", 1159 "LIKE", 1160 "PERIOD", 1161 "PRIMARY KEY", 1162 "UNIQUE", 1163 "WATERMARK", 1164 "BUCKET", 1165 "TRUNCATE", 1166 } 1167 1168 NO_PAREN_FUNCTION_PARSERS = { 1169 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1170 "CASE": lambda self: self._parse_case(), 1171 "CONNECT_BY_ROOT": lambda self: self.expression( 1172 exp.ConnectByRoot, this=self._parse_column() 1173 ), 1174 "IF": lambda self: self._parse_if(), 1175 } 1176 1177 INVALID_FUNC_NAME_TOKENS = { 1178 TokenType.IDENTIFIER, 1179 TokenType.STRING, 1180 } 1181 1182 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1183 1184 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1185 1186 FUNCTION_PARSERS = { 1187 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1188 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1189 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1190 "DECODE": lambda self: self._parse_decode(), 1191 "EXTRACT": lambda self: self._parse_extract(), 1192 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1193 "GAP_FILL": lambda self: self._parse_gap_fill(), 1194 "JSON_OBJECT": lambda self: self._parse_json_object(), 1195 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1196 "JSON_TABLE": lambda self: self._parse_json_table(), 1197 "MATCH": lambda self: self._parse_match_against(), 1198 "NORMALIZE": lambda self: self._parse_normalize(), 1199 "OPENJSON": lambda self: self._parse_open_json(), 1200 "OVERLAY": lambda self: self._parse_overlay(), 1201 "POSITION": lambda self: self._parse_position(), 1202 "PREDICT": lambda self: self._parse_predict(), 1203 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1204 "STRING_AGG": lambda self: self._parse_string_agg(), 1205 "SUBSTRING": lambda self: self._parse_substring(), 1206 "TRIM": lambda self: self._parse_trim(), 1207 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1208 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1209 "XMLELEMENT": lambda self: self.expression( 1210 exp.XMLElement, 1211 this=self._match_text_seq("NAME") and self._parse_id_var(), 1212 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1213 ), 1214 "XMLTABLE": lambda self: self._parse_xml_table(), 1215 } 1216 1217 QUERY_MODIFIER_PARSERS = { 1218 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1219 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1220 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1221 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1222 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1223 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1224 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1225 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1226 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1227 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1228 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1229 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1230 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1231 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1232 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1233 TokenType.CLUSTER_BY: lambda self: ( 1234 "cluster", 1235 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1236 ), 1237 TokenType.DISTRIBUTE_BY: lambda self: ( 1238 "distribute", 1239 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1240 ), 1241 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1242 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1243 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1244 } 1245 1246 SET_PARSERS = { 1247 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1248 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1249 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1250 "TRANSACTION": lambda self: self._parse_set_transaction(), 1251 } 1252 1253 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1254 1255 TYPE_LITERAL_PARSERS = { 1256 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1257 } 1258 1259 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1260 1261 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1262 1263 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1264 1265 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1266 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1267 "ISOLATION": ( 1268 ("LEVEL", "REPEATABLE", "READ"), 1269 ("LEVEL", "READ", "COMMITTED"), 1270 ("LEVEL", "READ", "UNCOMITTED"), 1271 ("LEVEL", "SERIALIZABLE"), 1272 ), 1273 "READ": ("WRITE", "ONLY"), 1274 } 1275 1276 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1277 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1278 ) 1279 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1280 1281 CREATE_SEQUENCE: OPTIONS_TYPE = { 1282 "SCALE": ("EXTEND", "NOEXTEND"), 1283 "SHARD": ("EXTEND", "NOEXTEND"), 1284 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1285 **dict.fromkeys( 1286 ( 1287 "SESSION", 1288 "GLOBAL", 1289 "KEEP", 1290 "NOKEEP", 1291 "ORDER", 1292 "NOORDER", 1293 "NOCACHE", 1294 "CYCLE", 1295 "NOCYCLE", 1296 "NOMINVALUE", 1297 "NOMAXVALUE", 1298 "NOSCALE", 1299 "NOSHARD", 1300 ), 1301 tuple(), 1302 ), 1303 } 1304 1305 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1306 1307 USABLES: OPTIONS_TYPE = dict.fromkeys( 1308 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1309 ) 1310 1311 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1312 1313 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1314 "TYPE": ("EVOLUTION",), 1315 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1316 } 1317 1318 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1319 1320 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1321 1322 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1323 "NOT": ("ENFORCED",), 1324 "MATCH": ( 1325 "FULL", 1326 "PARTIAL", 1327 "SIMPLE", 1328 ), 1329 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1330 "USING": ( 1331 "BTREE", 1332 "HASH", 1333 ), 1334 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1335 } 1336 1337 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1338 1339 CLONE_KEYWORDS = {"CLONE", "COPY"} 1340 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1341 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1342 1343 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1344 1345 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1346 1347 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1348 1349 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1350 1351 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1352 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1353 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1354 1355 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1356 1357 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1358 1359 ADD_CONSTRAINT_TOKENS = { 1360 TokenType.CONSTRAINT, 1361 TokenType.FOREIGN_KEY, 1362 TokenType.INDEX, 1363 TokenType.KEY, 1364 TokenType.PRIMARY_KEY, 1365 TokenType.UNIQUE, 1366 } 1367 1368 DISTINCT_TOKENS = {TokenType.DISTINCT} 1369 1370 NULL_TOKENS = {TokenType.NULL} 1371 1372 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1373 1374 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1375 1376 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1377 1378 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1379 1380 ODBC_DATETIME_LITERALS = { 1381 "d": exp.Date, 1382 "t": exp.Time, 1383 "ts": exp.Timestamp, 1384 } 1385 1386 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1387 1388 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1389 1390 # The style options for the DESCRIBE statement 1391 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1392 1393 # The style options for the ANALYZE statement 1394 ANALYZE_STYLES = { 1395 "BUFFER_USAGE_LIMIT", 1396 "FULL", 1397 "LOCAL", 1398 "NO_WRITE_TO_BINLOG", 1399 "SAMPLE", 1400 "SKIP_LOCKED", 1401 "VERBOSE", 1402 } 1403 1404 ANALYZE_EXPRESSION_PARSERS = { 1405 "ALL": lambda self: self._parse_analyze_columns(), 1406 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1407 "DELETE": lambda self: self._parse_analyze_delete(), 1408 "DROP": lambda self: self._parse_analyze_histogram(), 1409 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1410 "LIST": lambda self: self._parse_analyze_list(), 1411 "PREDICATE": lambda self: self._parse_analyze_columns(), 1412 "UPDATE": lambda self: self._parse_analyze_histogram(), 1413 "VALIDATE": lambda self: self._parse_analyze_validate(), 1414 } 1415 1416 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1417 1418 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1419 1420 OPERATION_MODIFIERS: t.Set[str] = set() 1421 1422 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1423 1424 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1425 1426 STRICT_CAST = True 1427 1428 PREFIXED_PIVOT_COLUMNS = False 1429 IDENTIFY_PIVOT_STRINGS = False 1430 1431 LOG_DEFAULTS_TO_LN = False 1432 1433 # Whether ADD is present for each column added by ALTER TABLE 1434 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1435 1436 # Whether the table sample clause expects CSV syntax 1437 TABLESAMPLE_CSV = False 1438 1439 # The default method used for table sampling 1440 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1441 1442 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1443 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1444 1445 # Whether the TRIM function expects the characters to trim as its first argument 1446 TRIM_PATTERN_FIRST = False 1447 1448 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1449 STRING_ALIASES = False 1450 1451 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1452 MODIFIERS_ATTACHED_TO_SET_OP = True 1453 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1454 1455 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1456 NO_PAREN_IF_COMMANDS = True 1457 1458 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1459 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1460 1461 # Whether the `:` operator is used to extract a value from a VARIANT column 1462 COLON_IS_VARIANT_EXTRACT = False 1463 1464 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1465 # If this is True and '(' is not found, the keyword will be treated as an identifier 1466 VALUES_FOLLOWED_BY_PAREN = True 1467 1468 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1469 SUPPORTS_IMPLICIT_UNNEST = False 1470 1471 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1472 INTERVAL_SPANS = True 1473 1474 # Whether a PARTITION clause can follow a table reference 1475 SUPPORTS_PARTITION_SELECTION = False 1476 1477 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1478 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1479 1480 # Whether the 'AS' keyword is optional in the CTE definition syntax 1481 OPTIONAL_ALIAS_TOKEN_CTE = True 1482 1483 __slots__ = ( 1484 "error_level", 1485 "error_message_context", 1486 "max_errors", 1487 "dialect", 1488 "sql", 1489 "errors", 1490 "_tokens", 1491 "_index", 1492 "_curr", 1493 "_next", 1494 "_prev", 1495 "_prev_comments", 1496 ) 1497 1498 # Autofilled 1499 SHOW_TRIE: t.Dict = {} 1500 SET_TRIE: t.Dict = {} 1501 1502 def __init__( 1503 self, 1504 error_level: t.Optional[ErrorLevel] = None, 1505 error_message_context: int = 100, 1506 max_errors: int = 3, 1507 dialect: DialectType = None, 1508 ): 1509 from sqlglot.dialects import Dialect 1510 1511 self.error_level = error_level or ErrorLevel.IMMEDIATE 1512 self.error_message_context = error_message_context 1513 self.max_errors = max_errors 1514 self.dialect = Dialect.get_or_raise(dialect) 1515 self.reset() 1516 1517 def reset(self): 1518 self.sql = "" 1519 self.errors = [] 1520 self._tokens = [] 1521 self._index = 0 1522 self._curr = None 1523 self._next = None 1524 self._prev = None 1525 self._prev_comments = None 1526 1527 def parse( 1528 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1529 ) -> t.List[t.Optional[exp.Expression]]: 1530 """ 1531 Parses a list of tokens and returns a list of syntax trees, one tree 1532 per parsed SQL statement. 1533 1534 Args: 1535 raw_tokens: The list of tokens. 1536 sql: The original SQL string, used to produce helpful debug messages. 1537 1538 Returns: 1539 The list of the produced syntax trees. 1540 """ 1541 return self._parse( 1542 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1543 ) 1544 1545 def parse_into( 1546 self, 1547 expression_types: exp.IntoType, 1548 raw_tokens: t.List[Token], 1549 sql: t.Optional[str] = None, 1550 ) -> t.List[t.Optional[exp.Expression]]: 1551 """ 1552 Parses a list of tokens into a given Expression type. If a collection of Expression 1553 types is given instead, this method will try to parse the token list into each one 1554 of them, stopping at the first for which the parsing succeeds. 1555 1556 Args: 1557 expression_types: The expression type(s) to try and parse the token list into. 1558 raw_tokens: The list of tokens. 1559 sql: The original SQL string, used to produce helpful debug messages. 1560 1561 Returns: 1562 The target Expression. 1563 """ 1564 errors = [] 1565 for expression_type in ensure_list(expression_types): 1566 parser = self.EXPRESSION_PARSERS.get(expression_type) 1567 if not parser: 1568 raise TypeError(f"No parser registered for {expression_type}") 1569 1570 try: 1571 return self._parse(parser, raw_tokens, sql) 1572 except ParseError as e: 1573 e.errors[0]["into_expression"] = expression_type 1574 errors.append(e) 1575 1576 raise ParseError( 1577 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1578 errors=merge_errors(errors), 1579 ) from errors[-1] 1580 1581 def _parse( 1582 self, 1583 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1584 raw_tokens: t.List[Token], 1585 sql: t.Optional[str] = None, 1586 ) -> t.List[t.Optional[exp.Expression]]: 1587 self.reset() 1588 self.sql = sql or "" 1589 1590 total = len(raw_tokens) 1591 chunks: t.List[t.List[Token]] = [[]] 1592 1593 for i, token in enumerate(raw_tokens): 1594 if token.token_type == TokenType.SEMICOLON: 1595 if token.comments: 1596 chunks.append([token]) 1597 1598 if i < total - 1: 1599 chunks.append([]) 1600 else: 1601 chunks[-1].append(token) 1602 1603 expressions = [] 1604 1605 for tokens in chunks: 1606 self._index = -1 1607 self._tokens = tokens 1608 self._advance() 1609 1610 expressions.append(parse_method(self)) 1611 1612 if self._index < len(self._tokens): 1613 self.raise_error("Invalid expression / Unexpected token") 1614 1615 self.check_errors() 1616 1617 return expressions 1618 1619 def check_errors(self) -> None: 1620 """Logs or raises any found errors, depending on the chosen error level setting.""" 1621 if self.error_level == ErrorLevel.WARN: 1622 for error in self.errors: 1623 logger.error(str(error)) 1624 elif self.error_level == ErrorLevel.RAISE and self.errors: 1625 raise ParseError( 1626 concat_messages(self.errors, self.max_errors), 1627 errors=merge_errors(self.errors), 1628 ) 1629 1630 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1631 """ 1632 Appends an error in the list of recorded errors or raises it, depending on the chosen 1633 error level setting. 1634 """ 1635 token = token or self._curr or self._prev or Token.string("") 1636 start = token.start 1637 end = token.end + 1 1638 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1639 highlight = self.sql[start:end] 1640 end_context = self.sql[end : end + self.error_message_context] 1641 1642 error = ParseError.new( 1643 f"{message}. Line {token.line}, Col: {token.col}.\n" 1644 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1645 description=message, 1646 line=token.line, 1647 col=token.col, 1648 start_context=start_context, 1649 highlight=highlight, 1650 end_context=end_context, 1651 ) 1652 1653 if self.error_level == ErrorLevel.IMMEDIATE: 1654 raise error 1655 1656 self.errors.append(error) 1657 1658 def expression( 1659 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1660 ) -> E: 1661 """ 1662 Creates a new, validated Expression. 1663 1664 Args: 1665 exp_class: The expression class to instantiate. 1666 comments: An optional list of comments to attach to the expression. 1667 kwargs: The arguments to set for the expression along with their respective values. 1668 1669 Returns: 1670 The target expression. 1671 """ 1672 instance = exp_class(**kwargs) 1673 instance.add_comments(comments) if comments else self._add_comments(instance) 1674 return self.validate_expression(instance) 1675 1676 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1677 if expression and self._prev_comments: 1678 expression.add_comments(self._prev_comments) 1679 self._prev_comments = None 1680 1681 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1682 """ 1683 Validates an Expression, making sure that all its mandatory arguments are set. 1684 1685 Args: 1686 expression: The expression to validate. 1687 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1688 1689 Returns: 1690 The validated expression. 1691 """ 1692 if self.error_level != ErrorLevel.IGNORE: 1693 for error_message in expression.error_messages(args): 1694 self.raise_error(error_message) 1695 1696 return expression 1697 1698 def _find_sql(self, start: Token, end: Token) -> str: 1699 return self.sql[start.start : end.end + 1] 1700 1701 def _is_connected(self) -> bool: 1702 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1703 1704 def _advance(self, times: int = 1) -> None: 1705 self._index += times 1706 self._curr = seq_get(self._tokens, self._index) 1707 self._next = seq_get(self._tokens, self._index + 1) 1708 1709 if self._index > 0: 1710 self._prev = self._tokens[self._index - 1] 1711 self._prev_comments = self._prev.comments 1712 else: 1713 self._prev = None 1714 self._prev_comments = None 1715 1716 def _retreat(self, index: int) -> None: 1717 if index != self._index: 1718 self._advance(index - self._index) 1719 1720 def _warn_unsupported(self) -> None: 1721 if len(self._tokens) <= 1: 1722 return 1723 1724 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1725 # interested in emitting a warning for the one being currently processed. 1726 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1727 1728 logger.warning( 1729 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1730 ) 1731 1732 def _parse_command(self) -> exp.Command: 1733 self._warn_unsupported() 1734 return self.expression( 1735 exp.Command, 1736 comments=self._prev_comments, 1737 this=self._prev.text.upper(), 1738 expression=self._parse_string(), 1739 ) 1740 1741 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1742 """ 1743 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1744 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1745 solve this by setting & resetting the parser state accordingly 1746 """ 1747 index = self._index 1748 error_level = self.error_level 1749 1750 self.error_level = ErrorLevel.IMMEDIATE 1751 try: 1752 this = parse_method() 1753 except ParseError: 1754 this = None 1755 finally: 1756 if not this or retreat: 1757 self._retreat(index) 1758 self.error_level = error_level 1759 1760 return this 1761 1762 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1763 start = self._prev 1764 exists = self._parse_exists() if allow_exists else None 1765 1766 self._match(TokenType.ON) 1767 1768 materialized = self._match_text_seq("MATERIALIZED") 1769 kind = self._match_set(self.CREATABLES) and self._prev 1770 if not kind: 1771 return self._parse_as_command(start) 1772 1773 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1774 this = self._parse_user_defined_function(kind=kind.token_type) 1775 elif kind.token_type == TokenType.TABLE: 1776 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1777 elif kind.token_type == TokenType.COLUMN: 1778 this = self._parse_column() 1779 else: 1780 this = self._parse_id_var() 1781 1782 self._match(TokenType.IS) 1783 1784 return self.expression( 1785 exp.Comment, 1786 this=this, 1787 kind=kind.text, 1788 expression=self._parse_string(), 1789 exists=exists, 1790 materialized=materialized, 1791 ) 1792 1793 def _parse_to_table( 1794 self, 1795 ) -> exp.ToTableProperty: 1796 table = self._parse_table_parts(schema=True) 1797 return self.expression(exp.ToTableProperty, this=table) 1798 1799 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1800 def _parse_ttl(self) -> exp.Expression: 1801 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1802 this = self._parse_bitwise() 1803 1804 if self._match_text_seq("DELETE"): 1805 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1806 if self._match_text_seq("RECOMPRESS"): 1807 return self.expression( 1808 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1809 ) 1810 if self._match_text_seq("TO", "DISK"): 1811 return self.expression( 1812 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1813 ) 1814 if self._match_text_seq("TO", "VOLUME"): 1815 return self.expression( 1816 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1817 ) 1818 1819 return this 1820 1821 expressions = self._parse_csv(_parse_ttl_action) 1822 where = self._parse_where() 1823 group = self._parse_group() 1824 1825 aggregates = None 1826 if group and self._match(TokenType.SET): 1827 aggregates = self._parse_csv(self._parse_set_item) 1828 1829 return self.expression( 1830 exp.MergeTreeTTL, 1831 expressions=expressions, 1832 where=where, 1833 group=group, 1834 aggregates=aggregates, 1835 ) 1836 1837 def _parse_statement(self) -> t.Optional[exp.Expression]: 1838 if self._curr is None: 1839 return None 1840 1841 if self._match_set(self.STATEMENT_PARSERS): 1842 comments = self._prev_comments 1843 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1844 stmt.add_comments(comments, prepend=True) 1845 return stmt 1846 1847 if self._match_set(self.dialect.tokenizer.COMMANDS): 1848 return self._parse_command() 1849 1850 expression = self._parse_expression() 1851 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1852 return self._parse_query_modifiers(expression) 1853 1854 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1855 start = self._prev 1856 temporary = self._match(TokenType.TEMPORARY) 1857 materialized = self._match_text_seq("MATERIALIZED") 1858 1859 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1860 if not kind: 1861 return self._parse_as_command(start) 1862 1863 concurrently = self._match_text_seq("CONCURRENTLY") 1864 if_exists = exists or self._parse_exists() 1865 1866 if kind == "COLUMN": 1867 this = self._parse_column() 1868 else: 1869 this = self._parse_table_parts( 1870 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1871 ) 1872 1873 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1874 1875 if self._match(TokenType.L_PAREN, advance=False): 1876 expressions = self._parse_wrapped_csv(self._parse_types) 1877 else: 1878 expressions = None 1879 1880 return self.expression( 1881 exp.Drop, 1882 exists=if_exists, 1883 this=this, 1884 expressions=expressions, 1885 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1886 temporary=temporary, 1887 materialized=materialized, 1888 cascade=self._match_text_seq("CASCADE"), 1889 constraints=self._match_text_seq("CONSTRAINTS"), 1890 purge=self._match_text_seq("PURGE"), 1891 cluster=cluster, 1892 concurrently=concurrently, 1893 ) 1894 1895 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1896 return ( 1897 self._match_text_seq("IF") 1898 and (not not_ or self._match(TokenType.NOT)) 1899 and self._match(TokenType.EXISTS) 1900 ) 1901 1902 def _parse_create(self) -> exp.Create | exp.Command: 1903 # Note: this can't be None because we've matched a statement parser 1904 start = self._prev 1905 1906 replace = ( 1907 start.token_type == TokenType.REPLACE 1908 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1909 or self._match_pair(TokenType.OR, TokenType.ALTER) 1910 ) 1911 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1912 1913 unique = self._match(TokenType.UNIQUE) 1914 1915 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1916 clustered = True 1917 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1918 "COLUMNSTORE" 1919 ): 1920 clustered = False 1921 else: 1922 clustered = None 1923 1924 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1925 self._advance() 1926 1927 properties = None 1928 create_token = self._match_set(self.CREATABLES) and self._prev 1929 1930 if not create_token: 1931 # exp.Properties.Location.POST_CREATE 1932 properties = self._parse_properties() 1933 create_token = self._match_set(self.CREATABLES) and self._prev 1934 1935 if not properties or not create_token: 1936 return self._parse_as_command(start) 1937 1938 concurrently = self._match_text_seq("CONCURRENTLY") 1939 exists = self._parse_exists(not_=True) 1940 this = None 1941 expression: t.Optional[exp.Expression] = None 1942 indexes = None 1943 no_schema_binding = None 1944 begin = None 1945 end = None 1946 clone = None 1947 1948 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1949 nonlocal properties 1950 if properties and temp_props: 1951 properties.expressions.extend(temp_props.expressions) 1952 elif temp_props: 1953 properties = temp_props 1954 1955 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1956 this = self._parse_user_defined_function(kind=create_token.token_type) 1957 1958 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1959 extend_props(self._parse_properties()) 1960 1961 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1962 extend_props(self._parse_properties()) 1963 1964 if not expression: 1965 if self._match(TokenType.COMMAND): 1966 expression = self._parse_as_command(self._prev) 1967 else: 1968 begin = self._match(TokenType.BEGIN) 1969 return_ = self._match_text_seq("RETURN") 1970 1971 if self._match(TokenType.STRING, advance=False): 1972 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1973 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1974 expression = self._parse_string() 1975 extend_props(self._parse_properties()) 1976 else: 1977 expression = self._parse_user_defined_function_expression() 1978 1979 end = self._match_text_seq("END") 1980 1981 if return_: 1982 expression = self.expression(exp.Return, this=expression) 1983 elif create_token.token_type == TokenType.INDEX: 1984 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1985 if not self._match(TokenType.ON): 1986 index = self._parse_id_var() 1987 anonymous = False 1988 else: 1989 index = None 1990 anonymous = True 1991 1992 this = self._parse_index(index=index, anonymous=anonymous) 1993 elif create_token.token_type in self.DB_CREATABLES: 1994 table_parts = self._parse_table_parts( 1995 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1996 ) 1997 1998 # exp.Properties.Location.POST_NAME 1999 self._match(TokenType.COMMA) 2000 extend_props(self._parse_properties(before=True)) 2001 2002 this = self._parse_schema(this=table_parts) 2003 2004 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2005 extend_props(self._parse_properties()) 2006 2007 has_alias = self._match(TokenType.ALIAS) 2008 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2009 # exp.Properties.Location.POST_ALIAS 2010 extend_props(self._parse_properties()) 2011 2012 if create_token.token_type == TokenType.SEQUENCE: 2013 expression = self._parse_types() 2014 extend_props(self._parse_properties()) 2015 else: 2016 expression = self._parse_ddl_select() 2017 2018 # Some dialects also support using a table as an alias instead of a SELECT. 2019 # Here we fallback to this as an alternative. 2020 if not expression and has_alias: 2021 expression = self._try_parse(self._parse_table_parts) 2022 2023 if create_token.token_type == TokenType.TABLE: 2024 # exp.Properties.Location.POST_EXPRESSION 2025 extend_props(self._parse_properties()) 2026 2027 indexes = [] 2028 while True: 2029 index = self._parse_index() 2030 2031 # exp.Properties.Location.POST_INDEX 2032 extend_props(self._parse_properties()) 2033 if not index: 2034 break 2035 else: 2036 self._match(TokenType.COMMA) 2037 indexes.append(index) 2038 elif create_token.token_type == TokenType.VIEW: 2039 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2040 no_schema_binding = True 2041 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2042 extend_props(self._parse_properties()) 2043 2044 shallow = self._match_text_seq("SHALLOW") 2045 2046 if self._match_texts(self.CLONE_KEYWORDS): 2047 copy = self._prev.text.lower() == "copy" 2048 clone = self.expression( 2049 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2050 ) 2051 2052 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2053 return self._parse_as_command(start) 2054 2055 create_kind_text = create_token.text.upper() 2056 return self.expression( 2057 exp.Create, 2058 this=this, 2059 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2060 replace=replace, 2061 refresh=refresh, 2062 unique=unique, 2063 expression=expression, 2064 exists=exists, 2065 properties=properties, 2066 indexes=indexes, 2067 no_schema_binding=no_schema_binding, 2068 begin=begin, 2069 end=end, 2070 clone=clone, 2071 concurrently=concurrently, 2072 clustered=clustered, 2073 ) 2074 2075 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2076 seq = exp.SequenceProperties() 2077 2078 options = [] 2079 index = self._index 2080 2081 while self._curr: 2082 self._match(TokenType.COMMA) 2083 if self._match_text_seq("INCREMENT"): 2084 self._match_text_seq("BY") 2085 self._match_text_seq("=") 2086 seq.set("increment", self._parse_term()) 2087 elif self._match_text_seq("MINVALUE"): 2088 seq.set("minvalue", self._parse_term()) 2089 elif self._match_text_seq("MAXVALUE"): 2090 seq.set("maxvalue", self._parse_term()) 2091 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2092 self._match_text_seq("=") 2093 seq.set("start", self._parse_term()) 2094 elif self._match_text_seq("CACHE"): 2095 # T-SQL allows empty CACHE which is initialized dynamically 2096 seq.set("cache", self._parse_number() or True) 2097 elif self._match_text_seq("OWNED", "BY"): 2098 # "OWNED BY NONE" is the default 2099 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2100 else: 2101 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2102 if opt: 2103 options.append(opt) 2104 else: 2105 break 2106 2107 seq.set("options", options if options else None) 2108 return None if self._index == index else seq 2109 2110 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2111 # only used for teradata currently 2112 self._match(TokenType.COMMA) 2113 2114 kwargs = { 2115 "no": self._match_text_seq("NO"), 2116 "dual": self._match_text_seq("DUAL"), 2117 "before": self._match_text_seq("BEFORE"), 2118 "default": self._match_text_seq("DEFAULT"), 2119 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2120 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2121 "after": self._match_text_seq("AFTER"), 2122 "minimum": self._match_texts(("MIN", "MINIMUM")), 2123 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2124 } 2125 2126 if self._match_texts(self.PROPERTY_PARSERS): 2127 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2128 try: 2129 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2130 except TypeError: 2131 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2132 2133 return None 2134 2135 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2136 return self._parse_wrapped_csv(self._parse_property) 2137 2138 def _parse_property(self) -> t.Optional[exp.Expression]: 2139 if self._match_texts(self.PROPERTY_PARSERS): 2140 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2141 2142 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2143 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2144 2145 if self._match_text_seq("COMPOUND", "SORTKEY"): 2146 return self._parse_sortkey(compound=True) 2147 2148 if self._match_text_seq("SQL", "SECURITY"): 2149 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2150 2151 index = self._index 2152 key = self._parse_column() 2153 2154 if not self._match(TokenType.EQ): 2155 self._retreat(index) 2156 return self._parse_sequence_properties() 2157 2158 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2159 if isinstance(key, exp.Column): 2160 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2161 2162 value = self._parse_bitwise() or self._parse_var(any_token=True) 2163 2164 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2165 if isinstance(value, exp.Column): 2166 value = exp.var(value.name) 2167 2168 return self.expression(exp.Property, this=key, value=value) 2169 2170 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2171 if self._match_text_seq("BY"): 2172 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2173 2174 self._match(TokenType.ALIAS) 2175 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2176 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2177 2178 return self.expression( 2179 exp.FileFormatProperty, 2180 this=( 2181 self.expression( 2182 exp.InputOutputFormat, 2183 input_format=input_format, 2184 output_format=output_format, 2185 ) 2186 if input_format or output_format 2187 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2188 ), 2189 ) 2190 2191 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2192 field = self._parse_field() 2193 if isinstance(field, exp.Identifier) and not field.quoted: 2194 field = exp.var(field) 2195 2196 return field 2197 2198 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2199 self._match(TokenType.EQ) 2200 self._match(TokenType.ALIAS) 2201 2202 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2203 2204 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2205 properties = [] 2206 while True: 2207 if before: 2208 prop = self._parse_property_before() 2209 else: 2210 prop = self._parse_property() 2211 if not prop: 2212 break 2213 for p in ensure_list(prop): 2214 properties.append(p) 2215 2216 if properties: 2217 return self.expression(exp.Properties, expressions=properties) 2218 2219 return None 2220 2221 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2222 return self.expression( 2223 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2224 ) 2225 2226 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2227 if self._match_texts(("DEFINER", "INVOKER")): 2228 security_specifier = self._prev.text.upper() 2229 return self.expression(exp.SecurityProperty, this=security_specifier) 2230 return None 2231 2232 def _parse_settings_property(self) -> exp.SettingsProperty: 2233 return self.expression( 2234 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2235 ) 2236 2237 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2238 if self._index >= 2: 2239 pre_volatile_token = self._tokens[self._index - 2] 2240 else: 2241 pre_volatile_token = None 2242 2243 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2244 return exp.VolatileProperty() 2245 2246 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2247 2248 def _parse_retention_period(self) -> exp.Var: 2249 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2250 number = self._parse_number() 2251 number_str = f"{number} " if number else "" 2252 unit = self._parse_var(any_token=True) 2253 return exp.var(f"{number_str}{unit}") 2254 2255 def _parse_system_versioning_property( 2256 self, with_: bool = False 2257 ) -> exp.WithSystemVersioningProperty: 2258 self._match(TokenType.EQ) 2259 prop = self.expression( 2260 exp.WithSystemVersioningProperty, 2261 **{ # type: ignore 2262 "on": True, 2263 "with": with_, 2264 }, 2265 ) 2266 2267 if self._match_text_seq("OFF"): 2268 prop.set("on", False) 2269 return prop 2270 2271 self._match(TokenType.ON) 2272 if self._match(TokenType.L_PAREN): 2273 while self._curr and not self._match(TokenType.R_PAREN): 2274 if self._match_text_seq("HISTORY_TABLE", "="): 2275 prop.set("this", self._parse_table_parts()) 2276 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2277 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2278 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2279 prop.set("retention_period", self._parse_retention_period()) 2280 2281 self._match(TokenType.COMMA) 2282 2283 return prop 2284 2285 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2286 self._match(TokenType.EQ) 2287 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2288 prop = self.expression(exp.DataDeletionProperty, on=on) 2289 2290 if self._match(TokenType.L_PAREN): 2291 while self._curr and not self._match(TokenType.R_PAREN): 2292 if self._match_text_seq("FILTER_COLUMN", "="): 2293 prop.set("filter_column", self._parse_column()) 2294 elif self._match_text_seq("RETENTION_PERIOD", "="): 2295 prop.set("retention_period", self._parse_retention_period()) 2296 2297 self._match(TokenType.COMMA) 2298 2299 return prop 2300 2301 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2302 kind = "HASH" 2303 expressions: t.Optional[t.List[exp.Expression]] = None 2304 if self._match_text_seq("BY", "HASH"): 2305 expressions = self._parse_wrapped_csv(self._parse_id_var) 2306 elif self._match_text_seq("BY", "RANDOM"): 2307 kind = "RANDOM" 2308 2309 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2310 buckets: t.Optional[exp.Expression] = None 2311 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2312 buckets = self._parse_number() 2313 2314 return self.expression( 2315 exp.DistributedByProperty, 2316 expressions=expressions, 2317 kind=kind, 2318 buckets=buckets, 2319 order=self._parse_order(), 2320 ) 2321 2322 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2323 self._match_text_seq("KEY") 2324 expressions = self._parse_wrapped_id_vars() 2325 return self.expression(expr_type, expressions=expressions) 2326 2327 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2328 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2329 prop = self._parse_system_versioning_property(with_=True) 2330 self._match_r_paren() 2331 return prop 2332 2333 if self._match(TokenType.L_PAREN, advance=False): 2334 return self._parse_wrapped_properties() 2335 2336 if self._match_text_seq("JOURNAL"): 2337 return self._parse_withjournaltable() 2338 2339 if self._match_texts(self.VIEW_ATTRIBUTES): 2340 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2341 2342 if self._match_text_seq("DATA"): 2343 return self._parse_withdata(no=False) 2344 elif self._match_text_seq("NO", "DATA"): 2345 return self._parse_withdata(no=True) 2346 2347 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2348 return self._parse_serde_properties(with_=True) 2349 2350 if self._match(TokenType.SCHEMA): 2351 return self.expression( 2352 exp.WithSchemaBindingProperty, 2353 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2354 ) 2355 2356 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2357 return self.expression( 2358 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2359 ) 2360 2361 if not self._next: 2362 return None 2363 2364 return self._parse_withisolatedloading() 2365 2366 def _parse_procedure_option(self) -> exp.Expression | None: 2367 if self._match_text_seq("EXECUTE", "AS"): 2368 return self.expression( 2369 exp.ExecuteAsProperty, 2370 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2371 or self._parse_string(), 2372 ) 2373 2374 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2375 2376 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2377 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2378 self._match(TokenType.EQ) 2379 2380 user = self._parse_id_var() 2381 self._match(TokenType.PARAMETER) 2382 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2383 2384 if not user or not host: 2385 return None 2386 2387 return exp.DefinerProperty(this=f"{user}@{host}") 2388 2389 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2390 self._match(TokenType.TABLE) 2391 self._match(TokenType.EQ) 2392 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2393 2394 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2395 return self.expression(exp.LogProperty, no=no) 2396 2397 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2398 return self.expression(exp.JournalProperty, **kwargs) 2399 2400 def _parse_checksum(self) -> exp.ChecksumProperty: 2401 self._match(TokenType.EQ) 2402 2403 on = None 2404 if self._match(TokenType.ON): 2405 on = True 2406 elif self._match_text_seq("OFF"): 2407 on = False 2408 2409 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2410 2411 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2412 return self.expression( 2413 exp.Cluster, 2414 expressions=( 2415 self._parse_wrapped_csv(self._parse_ordered) 2416 if wrapped 2417 else self._parse_csv(self._parse_ordered) 2418 ), 2419 ) 2420 2421 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2422 self._match_text_seq("BY") 2423 2424 self._match_l_paren() 2425 expressions = self._parse_csv(self._parse_column) 2426 self._match_r_paren() 2427 2428 if self._match_text_seq("SORTED", "BY"): 2429 self._match_l_paren() 2430 sorted_by = self._parse_csv(self._parse_ordered) 2431 self._match_r_paren() 2432 else: 2433 sorted_by = None 2434 2435 self._match(TokenType.INTO) 2436 buckets = self._parse_number() 2437 self._match_text_seq("BUCKETS") 2438 2439 return self.expression( 2440 exp.ClusteredByProperty, 2441 expressions=expressions, 2442 sorted_by=sorted_by, 2443 buckets=buckets, 2444 ) 2445 2446 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2447 if not self._match_text_seq("GRANTS"): 2448 self._retreat(self._index - 1) 2449 return None 2450 2451 return self.expression(exp.CopyGrantsProperty) 2452 2453 def _parse_freespace(self) -> exp.FreespaceProperty: 2454 self._match(TokenType.EQ) 2455 return self.expression( 2456 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2457 ) 2458 2459 def _parse_mergeblockratio( 2460 self, no: bool = False, default: bool = False 2461 ) -> exp.MergeBlockRatioProperty: 2462 if self._match(TokenType.EQ): 2463 return self.expression( 2464 exp.MergeBlockRatioProperty, 2465 this=self._parse_number(), 2466 percent=self._match(TokenType.PERCENT), 2467 ) 2468 2469 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2470 2471 def _parse_datablocksize( 2472 self, 2473 default: t.Optional[bool] = None, 2474 minimum: t.Optional[bool] = None, 2475 maximum: t.Optional[bool] = None, 2476 ) -> exp.DataBlocksizeProperty: 2477 self._match(TokenType.EQ) 2478 size = self._parse_number() 2479 2480 units = None 2481 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2482 units = self._prev.text 2483 2484 return self.expression( 2485 exp.DataBlocksizeProperty, 2486 size=size, 2487 units=units, 2488 default=default, 2489 minimum=minimum, 2490 maximum=maximum, 2491 ) 2492 2493 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2494 self._match(TokenType.EQ) 2495 always = self._match_text_seq("ALWAYS") 2496 manual = self._match_text_seq("MANUAL") 2497 never = self._match_text_seq("NEVER") 2498 default = self._match_text_seq("DEFAULT") 2499 2500 autotemp = None 2501 if self._match_text_seq("AUTOTEMP"): 2502 autotemp = self._parse_schema() 2503 2504 return self.expression( 2505 exp.BlockCompressionProperty, 2506 always=always, 2507 manual=manual, 2508 never=never, 2509 default=default, 2510 autotemp=autotemp, 2511 ) 2512 2513 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2514 index = self._index 2515 no = self._match_text_seq("NO") 2516 concurrent = self._match_text_seq("CONCURRENT") 2517 2518 if not self._match_text_seq("ISOLATED", "LOADING"): 2519 self._retreat(index) 2520 return None 2521 2522 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2523 return self.expression( 2524 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2525 ) 2526 2527 def _parse_locking(self) -> exp.LockingProperty: 2528 if self._match(TokenType.TABLE): 2529 kind = "TABLE" 2530 elif self._match(TokenType.VIEW): 2531 kind = "VIEW" 2532 elif self._match(TokenType.ROW): 2533 kind = "ROW" 2534 elif self._match_text_seq("DATABASE"): 2535 kind = "DATABASE" 2536 else: 2537 kind = None 2538 2539 if kind in ("DATABASE", "TABLE", "VIEW"): 2540 this = self._parse_table_parts() 2541 else: 2542 this = None 2543 2544 if self._match(TokenType.FOR): 2545 for_or_in = "FOR" 2546 elif self._match(TokenType.IN): 2547 for_or_in = "IN" 2548 else: 2549 for_or_in = None 2550 2551 if self._match_text_seq("ACCESS"): 2552 lock_type = "ACCESS" 2553 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2554 lock_type = "EXCLUSIVE" 2555 elif self._match_text_seq("SHARE"): 2556 lock_type = "SHARE" 2557 elif self._match_text_seq("READ"): 2558 lock_type = "READ" 2559 elif self._match_text_seq("WRITE"): 2560 lock_type = "WRITE" 2561 elif self._match_text_seq("CHECKSUM"): 2562 lock_type = "CHECKSUM" 2563 else: 2564 lock_type = None 2565 2566 override = self._match_text_seq("OVERRIDE") 2567 2568 return self.expression( 2569 exp.LockingProperty, 2570 this=this, 2571 kind=kind, 2572 for_or_in=for_or_in, 2573 lock_type=lock_type, 2574 override=override, 2575 ) 2576 2577 def _parse_partition_by(self) -> t.List[exp.Expression]: 2578 if self._match(TokenType.PARTITION_BY): 2579 return self._parse_csv(self._parse_assignment) 2580 return [] 2581 2582 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2583 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2584 if self._match_text_seq("MINVALUE"): 2585 return exp.var("MINVALUE") 2586 if self._match_text_seq("MAXVALUE"): 2587 return exp.var("MAXVALUE") 2588 return self._parse_bitwise() 2589 2590 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2591 expression = None 2592 from_expressions = None 2593 to_expressions = None 2594 2595 if self._match(TokenType.IN): 2596 this = self._parse_wrapped_csv(self._parse_bitwise) 2597 elif self._match(TokenType.FROM): 2598 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2599 self._match_text_seq("TO") 2600 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2601 elif self._match_text_seq("WITH", "(", "MODULUS"): 2602 this = self._parse_number() 2603 self._match_text_seq(",", "REMAINDER") 2604 expression = self._parse_number() 2605 self._match_r_paren() 2606 else: 2607 self.raise_error("Failed to parse partition bound spec.") 2608 2609 return self.expression( 2610 exp.PartitionBoundSpec, 2611 this=this, 2612 expression=expression, 2613 from_expressions=from_expressions, 2614 to_expressions=to_expressions, 2615 ) 2616 2617 # https://www.postgresql.org/docs/current/sql-createtable.html 2618 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2619 if not self._match_text_seq("OF"): 2620 self._retreat(self._index - 1) 2621 return None 2622 2623 this = self._parse_table(schema=True) 2624 2625 if self._match(TokenType.DEFAULT): 2626 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2627 elif self._match_text_seq("FOR", "VALUES"): 2628 expression = self._parse_partition_bound_spec() 2629 else: 2630 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2631 2632 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2633 2634 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2635 self._match(TokenType.EQ) 2636 return self.expression( 2637 exp.PartitionedByProperty, 2638 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2639 ) 2640 2641 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2642 if self._match_text_seq("AND", "STATISTICS"): 2643 statistics = True 2644 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2645 statistics = False 2646 else: 2647 statistics = None 2648 2649 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2650 2651 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2652 if self._match_text_seq("SQL"): 2653 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2654 return None 2655 2656 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2657 if self._match_text_seq("SQL", "DATA"): 2658 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2659 return None 2660 2661 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2662 if self._match_text_seq("PRIMARY", "INDEX"): 2663 return exp.NoPrimaryIndexProperty() 2664 if self._match_text_seq("SQL"): 2665 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2666 return None 2667 2668 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2669 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2670 return exp.OnCommitProperty() 2671 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2672 return exp.OnCommitProperty(delete=True) 2673 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2674 2675 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2676 if self._match_text_seq("SQL", "DATA"): 2677 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2678 return None 2679 2680 def _parse_distkey(self) -> exp.DistKeyProperty: 2681 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2682 2683 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2684 table = self._parse_table(schema=True) 2685 2686 options = [] 2687 while self._match_texts(("INCLUDING", "EXCLUDING")): 2688 this = self._prev.text.upper() 2689 2690 id_var = self._parse_id_var() 2691 if not id_var: 2692 return None 2693 2694 options.append( 2695 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2696 ) 2697 2698 return self.expression(exp.LikeProperty, this=table, expressions=options) 2699 2700 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2701 return self.expression( 2702 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2703 ) 2704 2705 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2706 self._match(TokenType.EQ) 2707 return self.expression( 2708 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2709 ) 2710 2711 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2712 self._match_text_seq("WITH", "CONNECTION") 2713 return self.expression( 2714 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2715 ) 2716 2717 def _parse_returns(self) -> exp.ReturnsProperty: 2718 value: t.Optional[exp.Expression] 2719 null = None 2720 is_table = self._match(TokenType.TABLE) 2721 2722 if is_table: 2723 if self._match(TokenType.LT): 2724 value = self.expression( 2725 exp.Schema, 2726 this="TABLE", 2727 expressions=self._parse_csv(self._parse_struct_types), 2728 ) 2729 if not self._match(TokenType.GT): 2730 self.raise_error("Expecting >") 2731 else: 2732 value = self._parse_schema(exp.var("TABLE")) 2733 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2734 null = True 2735 value = None 2736 else: 2737 value = self._parse_types() 2738 2739 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2740 2741 def _parse_describe(self) -> exp.Describe: 2742 kind = self._match_set(self.CREATABLES) and self._prev.text 2743 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2744 if self._match(TokenType.DOT): 2745 style = None 2746 self._retreat(self._index - 2) 2747 2748 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2749 2750 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2751 this = self._parse_statement() 2752 else: 2753 this = self._parse_table(schema=True) 2754 2755 properties = self._parse_properties() 2756 expressions = properties.expressions if properties else None 2757 partition = self._parse_partition() 2758 return self.expression( 2759 exp.Describe, 2760 this=this, 2761 style=style, 2762 kind=kind, 2763 expressions=expressions, 2764 partition=partition, 2765 format=format, 2766 ) 2767 2768 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2769 kind = self._prev.text.upper() 2770 expressions = [] 2771 2772 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2773 if self._match(TokenType.WHEN): 2774 expression = self._parse_disjunction() 2775 self._match(TokenType.THEN) 2776 else: 2777 expression = None 2778 2779 else_ = self._match(TokenType.ELSE) 2780 2781 if not self._match(TokenType.INTO): 2782 return None 2783 2784 return self.expression( 2785 exp.ConditionalInsert, 2786 this=self.expression( 2787 exp.Insert, 2788 this=self._parse_table(schema=True), 2789 expression=self._parse_derived_table_values(), 2790 ), 2791 expression=expression, 2792 else_=else_, 2793 ) 2794 2795 expression = parse_conditional_insert() 2796 while expression is not None: 2797 expressions.append(expression) 2798 expression = parse_conditional_insert() 2799 2800 return self.expression( 2801 exp.MultitableInserts, 2802 kind=kind, 2803 comments=comments, 2804 expressions=expressions, 2805 source=self._parse_table(), 2806 ) 2807 2808 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2809 comments = [] 2810 hint = self._parse_hint() 2811 overwrite = self._match(TokenType.OVERWRITE) 2812 ignore = self._match(TokenType.IGNORE) 2813 local = self._match_text_seq("LOCAL") 2814 alternative = None 2815 is_function = None 2816 2817 if self._match_text_seq("DIRECTORY"): 2818 this: t.Optional[exp.Expression] = self.expression( 2819 exp.Directory, 2820 this=self._parse_var_or_string(), 2821 local=local, 2822 row_format=self._parse_row_format(match_row=True), 2823 ) 2824 else: 2825 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2826 comments += ensure_list(self._prev_comments) 2827 return self._parse_multitable_inserts(comments) 2828 2829 if self._match(TokenType.OR): 2830 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2831 2832 self._match(TokenType.INTO) 2833 comments += ensure_list(self._prev_comments) 2834 self._match(TokenType.TABLE) 2835 is_function = self._match(TokenType.FUNCTION) 2836 2837 this = ( 2838 self._parse_table(schema=True, parse_partition=True) 2839 if not is_function 2840 else self._parse_function() 2841 ) 2842 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2843 this.set("alias", self._parse_table_alias()) 2844 2845 returning = self._parse_returning() 2846 2847 return self.expression( 2848 exp.Insert, 2849 comments=comments, 2850 hint=hint, 2851 is_function=is_function, 2852 this=this, 2853 stored=self._match_text_seq("STORED") and self._parse_stored(), 2854 by_name=self._match_text_seq("BY", "NAME"), 2855 exists=self._parse_exists(), 2856 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2857 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2858 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2859 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2860 conflict=self._parse_on_conflict(), 2861 returning=returning or self._parse_returning(), 2862 overwrite=overwrite, 2863 alternative=alternative, 2864 ignore=ignore, 2865 source=self._match(TokenType.TABLE) and self._parse_table(), 2866 ) 2867 2868 def _parse_kill(self) -> exp.Kill: 2869 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2870 2871 return self.expression( 2872 exp.Kill, 2873 this=self._parse_primary(), 2874 kind=kind, 2875 ) 2876 2877 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2878 conflict = self._match_text_seq("ON", "CONFLICT") 2879 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2880 2881 if not conflict and not duplicate: 2882 return None 2883 2884 conflict_keys = None 2885 constraint = None 2886 2887 if conflict: 2888 if self._match_text_seq("ON", "CONSTRAINT"): 2889 constraint = self._parse_id_var() 2890 elif self._match(TokenType.L_PAREN): 2891 conflict_keys = self._parse_csv(self._parse_id_var) 2892 self._match_r_paren() 2893 2894 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2895 if self._prev.token_type == TokenType.UPDATE: 2896 self._match(TokenType.SET) 2897 expressions = self._parse_csv(self._parse_equality) 2898 else: 2899 expressions = None 2900 2901 return self.expression( 2902 exp.OnConflict, 2903 duplicate=duplicate, 2904 expressions=expressions, 2905 action=action, 2906 conflict_keys=conflict_keys, 2907 constraint=constraint, 2908 where=self._parse_where(), 2909 ) 2910 2911 def _parse_returning(self) -> t.Optional[exp.Returning]: 2912 if not self._match(TokenType.RETURNING): 2913 return None 2914 return self.expression( 2915 exp.Returning, 2916 expressions=self._parse_csv(self._parse_expression), 2917 into=self._match(TokenType.INTO) and self._parse_table_part(), 2918 ) 2919 2920 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2921 if not self._match(TokenType.FORMAT): 2922 return None 2923 return self._parse_row_format() 2924 2925 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2926 index = self._index 2927 with_ = with_ or self._match_text_seq("WITH") 2928 2929 if not self._match(TokenType.SERDE_PROPERTIES): 2930 self._retreat(index) 2931 return None 2932 return self.expression( 2933 exp.SerdeProperties, 2934 **{ # type: ignore 2935 "expressions": self._parse_wrapped_properties(), 2936 "with": with_, 2937 }, 2938 ) 2939 2940 def _parse_row_format( 2941 self, match_row: bool = False 2942 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2943 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2944 return None 2945 2946 if self._match_text_seq("SERDE"): 2947 this = self._parse_string() 2948 2949 serde_properties = self._parse_serde_properties() 2950 2951 return self.expression( 2952 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2953 ) 2954 2955 self._match_text_seq("DELIMITED") 2956 2957 kwargs = {} 2958 2959 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2960 kwargs["fields"] = self._parse_string() 2961 if self._match_text_seq("ESCAPED", "BY"): 2962 kwargs["escaped"] = self._parse_string() 2963 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2964 kwargs["collection_items"] = self._parse_string() 2965 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2966 kwargs["map_keys"] = self._parse_string() 2967 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2968 kwargs["lines"] = self._parse_string() 2969 if self._match_text_seq("NULL", "DEFINED", "AS"): 2970 kwargs["null"] = self._parse_string() 2971 2972 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2973 2974 def _parse_load(self) -> exp.LoadData | exp.Command: 2975 if self._match_text_seq("DATA"): 2976 local = self._match_text_seq("LOCAL") 2977 self._match_text_seq("INPATH") 2978 inpath = self._parse_string() 2979 overwrite = self._match(TokenType.OVERWRITE) 2980 self._match_pair(TokenType.INTO, TokenType.TABLE) 2981 2982 return self.expression( 2983 exp.LoadData, 2984 this=self._parse_table(schema=True), 2985 local=local, 2986 overwrite=overwrite, 2987 inpath=inpath, 2988 partition=self._parse_partition(), 2989 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2990 serde=self._match_text_seq("SERDE") and self._parse_string(), 2991 ) 2992 return self._parse_as_command(self._prev) 2993 2994 def _parse_delete(self) -> exp.Delete: 2995 # This handles MySQL's "Multiple-Table Syntax" 2996 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2997 tables = None 2998 if not self._match(TokenType.FROM, advance=False): 2999 tables = self._parse_csv(self._parse_table) or None 3000 3001 returning = self._parse_returning() 3002 3003 return self.expression( 3004 exp.Delete, 3005 tables=tables, 3006 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3007 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3008 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3009 where=self._parse_where(), 3010 returning=returning or self._parse_returning(), 3011 limit=self._parse_limit(), 3012 ) 3013 3014 def _parse_update(self) -> exp.Update: 3015 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3016 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3017 returning = self._parse_returning() 3018 return self.expression( 3019 exp.Update, 3020 **{ # type: ignore 3021 "this": this, 3022 "expressions": expressions, 3023 "from": self._parse_from(joins=True), 3024 "where": self._parse_where(), 3025 "returning": returning or self._parse_returning(), 3026 "order": self._parse_order(), 3027 "limit": self._parse_limit(), 3028 }, 3029 ) 3030 3031 def _parse_use(self) -> exp.Use: 3032 return self.expression( 3033 exp.Use, 3034 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3035 this=self._parse_table(schema=False), 3036 ) 3037 3038 def _parse_uncache(self) -> exp.Uncache: 3039 if not self._match(TokenType.TABLE): 3040 self.raise_error("Expecting TABLE after UNCACHE") 3041 3042 return self.expression( 3043 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3044 ) 3045 3046 def _parse_cache(self) -> exp.Cache: 3047 lazy = self._match_text_seq("LAZY") 3048 self._match(TokenType.TABLE) 3049 table = self._parse_table(schema=True) 3050 3051 options = [] 3052 if self._match_text_seq("OPTIONS"): 3053 self._match_l_paren() 3054 k = self._parse_string() 3055 self._match(TokenType.EQ) 3056 v = self._parse_string() 3057 options = [k, v] 3058 self._match_r_paren() 3059 3060 self._match(TokenType.ALIAS) 3061 return self.expression( 3062 exp.Cache, 3063 this=table, 3064 lazy=lazy, 3065 options=options, 3066 expression=self._parse_select(nested=True), 3067 ) 3068 3069 def _parse_partition(self) -> t.Optional[exp.Partition]: 3070 if not self._match_texts(self.PARTITION_KEYWORDS): 3071 return None 3072 3073 return self.expression( 3074 exp.Partition, 3075 subpartition=self._prev.text.upper() == "SUBPARTITION", 3076 expressions=self._parse_wrapped_csv(self._parse_assignment), 3077 ) 3078 3079 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3080 def _parse_value_expression() -> t.Optional[exp.Expression]: 3081 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3082 return exp.var(self._prev.text.upper()) 3083 return self._parse_expression() 3084 3085 if self._match(TokenType.L_PAREN): 3086 expressions = self._parse_csv(_parse_value_expression) 3087 self._match_r_paren() 3088 return self.expression(exp.Tuple, expressions=expressions) 3089 3090 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3091 expression = self._parse_expression() 3092 if expression: 3093 return self.expression(exp.Tuple, expressions=[expression]) 3094 return None 3095 3096 def _parse_projections(self) -> t.List[exp.Expression]: 3097 return self._parse_expressions() 3098 3099 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3100 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3101 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3102 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3103 ) 3104 elif self._match(TokenType.FROM): 3105 from_ = self._parse_from(skip_from_token=True) 3106 # Support parentheses for duckdb FROM-first syntax 3107 select = self._parse_select() 3108 if select: 3109 select.set("from", from_) 3110 this = select 3111 else: 3112 this = exp.select("*").from_(t.cast(exp.From, from_)) 3113 else: 3114 this = ( 3115 self._parse_table() 3116 if table 3117 else self._parse_select(nested=True, parse_set_operation=False) 3118 ) 3119 3120 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3121 # in case a modifier (e.g. join) is following 3122 if table and isinstance(this, exp.Values) and this.alias: 3123 alias = this.args["alias"].pop() 3124 this = exp.Table(this=this, alias=alias) 3125 3126 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3127 3128 return this 3129 3130 def _parse_select( 3131 self, 3132 nested: bool = False, 3133 table: bool = False, 3134 parse_subquery_alias: bool = True, 3135 parse_set_operation: bool = True, 3136 ) -> t.Optional[exp.Expression]: 3137 cte = self._parse_with() 3138 3139 if cte: 3140 this = self._parse_statement() 3141 3142 if not this: 3143 self.raise_error("Failed to parse any statement following CTE") 3144 return cte 3145 3146 if "with" in this.arg_types: 3147 this.set("with", cte) 3148 else: 3149 self.raise_error(f"{this.key} does not support CTE") 3150 this = cte 3151 3152 return this 3153 3154 # duckdb supports leading with FROM x 3155 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3156 3157 if self._match(TokenType.SELECT): 3158 comments = self._prev_comments 3159 3160 hint = self._parse_hint() 3161 3162 if self._next and not self._next.token_type == TokenType.DOT: 3163 all_ = self._match(TokenType.ALL) 3164 distinct = self._match_set(self.DISTINCT_TOKENS) 3165 else: 3166 all_, distinct = None, None 3167 3168 kind = ( 3169 self._match(TokenType.ALIAS) 3170 and self._match_texts(("STRUCT", "VALUE")) 3171 and self._prev.text.upper() 3172 ) 3173 3174 if distinct: 3175 distinct = self.expression( 3176 exp.Distinct, 3177 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3178 ) 3179 3180 if all_ and distinct: 3181 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3182 3183 operation_modifiers = [] 3184 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3185 operation_modifiers.append(exp.var(self._prev.text.upper())) 3186 3187 limit = self._parse_limit(top=True) 3188 projections = self._parse_projections() 3189 3190 this = self.expression( 3191 exp.Select, 3192 kind=kind, 3193 hint=hint, 3194 distinct=distinct, 3195 expressions=projections, 3196 limit=limit, 3197 operation_modifiers=operation_modifiers or None, 3198 ) 3199 this.comments = comments 3200 3201 into = self._parse_into() 3202 if into: 3203 this.set("into", into) 3204 3205 if not from_: 3206 from_ = self._parse_from() 3207 3208 if from_: 3209 this.set("from", from_) 3210 3211 this = self._parse_query_modifiers(this) 3212 elif (table or nested) and self._match(TokenType.L_PAREN): 3213 this = self._parse_wrapped_select(table=table) 3214 3215 # We return early here so that the UNION isn't attached to the subquery by the 3216 # following call to _parse_set_operations, but instead becomes the parent node 3217 self._match_r_paren() 3218 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3219 elif self._match(TokenType.VALUES, advance=False): 3220 this = self._parse_derived_table_values() 3221 elif from_: 3222 this = exp.select("*").from_(from_.this, copy=False) 3223 elif self._match(TokenType.SUMMARIZE): 3224 table = self._match(TokenType.TABLE) 3225 this = self._parse_select() or self._parse_string() or self._parse_table() 3226 return self.expression(exp.Summarize, this=this, table=table) 3227 elif self._match(TokenType.DESCRIBE): 3228 this = self._parse_describe() 3229 elif self._match_text_seq("STREAM"): 3230 this = self._parse_function() 3231 if this: 3232 this = self.expression(exp.Stream, this=this) 3233 else: 3234 self._retreat(self._index - 1) 3235 else: 3236 this = None 3237 3238 return self._parse_set_operations(this) if parse_set_operation else this 3239 3240 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3241 self._match_text_seq("SEARCH") 3242 3243 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3244 3245 if not kind: 3246 return None 3247 3248 self._match_text_seq("FIRST", "BY") 3249 3250 return self.expression( 3251 exp.RecursiveWithSearch, 3252 kind=kind, 3253 this=self._parse_id_var(), 3254 expression=self._match_text_seq("SET") and self._parse_id_var(), 3255 using=self._match_text_seq("USING") and self._parse_id_var(), 3256 ) 3257 3258 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3259 if not skip_with_token and not self._match(TokenType.WITH): 3260 return None 3261 3262 comments = self._prev_comments 3263 recursive = self._match(TokenType.RECURSIVE) 3264 3265 last_comments = None 3266 expressions = [] 3267 while True: 3268 cte = self._parse_cte() 3269 if isinstance(cte, exp.CTE): 3270 expressions.append(cte) 3271 if last_comments: 3272 cte.add_comments(last_comments) 3273 3274 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3275 break 3276 else: 3277 self._match(TokenType.WITH) 3278 3279 last_comments = self._prev_comments 3280 3281 return self.expression( 3282 exp.With, 3283 comments=comments, 3284 expressions=expressions, 3285 recursive=recursive, 3286 search=self._parse_recursive_with_search(), 3287 ) 3288 3289 def _parse_cte(self) -> t.Optional[exp.CTE]: 3290 index = self._index 3291 3292 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3293 if not alias or not alias.this: 3294 self.raise_error("Expected CTE to have alias") 3295 3296 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3297 self._retreat(index) 3298 return None 3299 3300 comments = self._prev_comments 3301 3302 if self._match_text_seq("NOT", "MATERIALIZED"): 3303 materialized = False 3304 elif self._match_text_seq("MATERIALIZED"): 3305 materialized = True 3306 else: 3307 materialized = None 3308 3309 cte = self.expression( 3310 exp.CTE, 3311 this=self._parse_wrapped(self._parse_statement), 3312 alias=alias, 3313 materialized=materialized, 3314 comments=comments, 3315 ) 3316 3317 if isinstance(cte.this, exp.Values): 3318 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3319 3320 return cte 3321 3322 def _parse_table_alias( 3323 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3324 ) -> t.Optional[exp.TableAlias]: 3325 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3326 # so this section tries to parse the clause version and if it fails, it treats the token 3327 # as an identifier (alias) 3328 if self._can_parse_limit_or_offset(): 3329 return None 3330 3331 any_token = self._match(TokenType.ALIAS) 3332 alias = ( 3333 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3334 or self._parse_string_as_identifier() 3335 ) 3336 3337 index = self._index 3338 if self._match(TokenType.L_PAREN): 3339 columns = self._parse_csv(self._parse_function_parameter) 3340 self._match_r_paren() if columns else self._retreat(index) 3341 else: 3342 columns = None 3343 3344 if not alias and not columns: 3345 return None 3346 3347 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3348 3349 # We bubble up comments from the Identifier to the TableAlias 3350 if isinstance(alias, exp.Identifier): 3351 table_alias.add_comments(alias.pop_comments()) 3352 3353 return table_alias 3354 3355 def _parse_subquery( 3356 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3357 ) -> t.Optional[exp.Subquery]: 3358 if not this: 3359 return None 3360 3361 return self.expression( 3362 exp.Subquery, 3363 this=this, 3364 pivots=self._parse_pivots(), 3365 alias=self._parse_table_alias() if parse_alias else None, 3366 sample=self._parse_table_sample(), 3367 ) 3368 3369 def _implicit_unnests_to_explicit(self, this: E) -> E: 3370 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3371 3372 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3373 for i, join in enumerate(this.args.get("joins") or []): 3374 table = join.this 3375 normalized_table = table.copy() 3376 normalized_table.meta["maybe_column"] = True 3377 normalized_table = _norm(normalized_table, dialect=self.dialect) 3378 3379 if isinstance(table, exp.Table) and not join.args.get("on"): 3380 if normalized_table.parts[0].name in refs: 3381 table_as_column = table.to_column() 3382 unnest = exp.Unnest(expressions=[table_as_column]) 3383 3384 # Table.to_column creates a parent Alias node that we want to convert to 3385 # a TableAlias and attach to the Unnest, so it matches the parser's output 3386 if isinstance(table.args.get("alias"), exp.TableAlias): 3387 table_as_column.replace(table_as_column.this) 3388 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3389 3390 table.replace(unnest) 3391 3392 refs.add(normalized_table.alias_or_name) 3393 3394 return this 3395 3396 def _parse_query_modifiers( 3397 self, this: t.Optional[exp.Expression] 3398 ) -> t.Optional[exp.Expression]: 3399 if isinstance(this, self.MODIFIABLES): 3400 for join in self._parse_joins(): 3401 this.append("joins", join) 3402 for lateral in iter(self._parse_lateral, None): 3403 this.append("laterals", lateral) 3404 3405 while True: 3406 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3407 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3408 key, expression = parser(self) 3409 3410 if expression: 3411 this.set(key, expression) 3412 if key == "limit": 3413 offset = expression.args.pop("offset", None) 3414 3415 if offset: 3416 offset = exp.Offset(expression=offset) 3417 this.set("offset", offset) 3418 3419 limit_by_expressions = expression.expressions 3420 expression.set("expressions", None) 3421 offset.set("expressions", limit_by_expressions) 3422 continue 3423 break 3424 3425 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3426 this = self._implicit_unnests_to_explicit(this) 3427 3428 return this 3429 3430 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3431 start = self._curr 3432 while self._curr: 3433 self._advance() 3434 3435 end = self._tokens[self._index - 1] 3436 return exp.Hint(expressions=[self._find_sql(start, end)]) 3437 3438 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3439 return self._parse_function_call() 3440 3441 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3442 start_index = self._index 3443 should_fallback_to_string = False 3444 3445 hints = [] 3446 try: 3447 for hint in iter( 3448 lambda: self._parse_csv( 3449 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3450 ), 3451 [], 3452 ): 3453 hints.extend(hint) 3454 except ParseError: 3455 should_fallback_to_string = True 3456 3457 if should_fallback_to_string or self._curr: 3458 self._retreat(start_index) 3459 return self._parse_hint_fallback_to_string() 3460 3461 return self.expression(exp.Hint, expressions=hints) 3462 3463 def _parse_hint(self) -> t.Optional[exp.Hint]: 3464 if self._match(TokenType.HINT) and self._prev_comments: 3465 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3466 3467 return None 3468 3469 def _parse_into(self) -> t.Optional[exp.Into]: 3470 if not self._match(TokenType.INTO): 3471 return None 3472 3473 temp = self._match(TokenType.TEMPORARY) 3474 unlogged = self._match_text_seq("UNLOGGED") 3475 self._match(TokenType.TABLE) 3476 3477 return self.expression( 3478 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3479 ) 3480 3481 def _parse_from( 3482 self, joins: bool = False, skip_from_token: bool = False 3483 ) -> t.Optional[exp.From]: 3484 if not skip_from_token and not self._match(TokenType.FROM): 3485 return None 3486 3487 return self.expression( 3488 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3489 ) 3490 3491 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3492 return self.expression( 3493 exp.MatchRecognizeMeasure, 3494 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3495 this=self._parse_expression(), 3496 ) 3497 3498 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3499 if not self._match(TokenType.MATCH_RECOGNIZE): 3500 return None 3501 3502 self._match_l_paren() 3503 3504 partition = self._parse_partition_by() 3505 order = self._parse_order() 3506 3507 measures = ( 3508 self._parse_csv(self._parse_match_recognize_measure) 3509 if self._match_text_seq("MEASURES") 3510 else None 3511 ) 3512 3513 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3514 rows = exp.var("ONE ROW PER MATCH") 3515 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3516 text = "ALL ROWS PER MATCH" 3517 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3518 text += " SHOW EMPTY MATCHES" 3519 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3520 text += " OMIT EMPTY MATCHES" 3521 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3522 text += " WITH UNMATCHED ROWS" 3523 rows = exp.var(text) 3524 else: 3525 rows = None 3526 3527 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3528 text = "AFTER MATCH SKIP" 3529 if self._match_text_seq("PAST", "LAST", "ROW"): 3530 text += " PAST LAST ROW" 3531 elif self._match_text_seq("TO", "NEXT", "ROW"): 3532 text += " TO NEXT ROW" 3533 elif self._match_text_seq("TO", "FIRST"): 3534 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3535 elif self._match_text_seq("TO", "LAST"): 3536 text += f" TO LAST {self._advance_any().text}" # type: ignore 3537 after = exp.var(text) 3538 else: 3539 after = None 3540 3541 if self._match_text_seq("PATTERN"): 3542 self._match_l_paren() 3543 3544 if not self._curr: 3545 self.raise_error("Expecting )", self._curr) 3546 3547 paren = 1 3548 start = self._curr 3549 3550 while self._curr and paren > 0: 3551 if self._curr.token_type == TokenType.L_PAREN: 3552 paren += 1 3553 if self._curr.token_type == TokenType.R_PAREN: 3554 paren -= 1 3555 3556 end = self._prev 3557 self._advance() 3558 3559 if paren > 0: 3560 self.raise_error("Expecting )", self._curr) 3561 3562 pattern = exp.var(self._find_sql(start, end)) 3563 else: 3564 pattern = None 3565 3566 define = ( 3567 self._parse_csv(self._parse_name_as_expression) 3568 if self._match_text_seq("DEFINE") 3569 else None 3570 ) 3571 3572 self._match_r_paren() 3573 3574 return self.expression( 3575 exp.MatchRecognize, 3576 partition_by=partition, 3577 order=order, 3578 measures=measures, 3579 rows=rows, 3580 after=after, 3581 pattern=pattern, 3582 define=define, 3583 alias=self._parse_table_alias(), 3584 ) 3585 3586 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3587 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3588 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3589 cross_apply = False 3590 3591 if cross_apply is not None: 3592 this = self._parse_select(table=True) 3593 view = None 3594 outer = None 3595 elif self._match(TokenType.LATERAL): 3596 this = self._parse_select(table=True) 3597 view = self._match(TokenType.VIEW) 3598 outer = self._match(TokenType.OUTER) 3599 else: 3600 return None 3601 3602 if not this: 3603 this = ( 3604 self._parse_unnest() 3605 or self._parse_function() 3606 or self._parse_id_var(any_token=False) 3607 ) 3608 3609 while self._match(TokenType.DOT): 3610 this = exp.Dot( 3611 this=this, 3612 expression=self._parse_function() or self._parse_id_var(any_token=False), 3613 ) 3614 3615 ordinality: t.Optional[bool] = None 3616 3617 if view: 3618 table = self._parse_id_var(any_token=False) 3619 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3620 table_alias: t.Optional[exp.TableAlias] = self.expression( 3621 exp.TableAlias, this=table, columns=columns 3622 ) 3623 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3624 # We move the alias from the lateral's child node to the lateral itself 3625 table_alias = this.args["alias"].pop() 3626 else: 3627 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3628 table_alias = self._parse_table_alias() 3629 3630 return self.expression( 3631 exp.Lateral, 3632 this=this, 3633 view=view, 3634 outer=outer, 3635 alias=table_alias, 3636 cross_apply=cross_apply, 3637 ordinality=ordinality, 3638 ) 3639 3640 def _parse_join_parts( 3641 self, 3642 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3643 return ( 3644 self._match_set(self.JOIN_METHODS) and self._prev, 3645 self._match_set(self.JOIN_SIDES) and self._prev, 3646 self._match_set(self.JOIN_KINDS) and self._prev, 3647 ) 3648 3649 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3650 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3651 this = self._parse_column() 3652 if isinstance(this, exp.Column): 3653 return this.this 3654 return this 3655 3656 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3657 3658 def _parse_join( 3659 self, skip_join_token: bool = False, parse_bracket: bool = False 3660 ) -> t.Optional[exp.Join]: 3661 if self._match(TokenType.COMMA): 3662 table = self._try_parse(self._parse_table) 3663 if table: 3664 return self.expression(exp.Join, this=table) 3665 return None 3666 3667 index = self._index 3668 method, side, kind = self._parse_join_parts() 3669 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3670 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3671 3672 if not skip_join_token and not join: 3673 self._retreat(index) 3674 kind = None 3675 method = None 3676 side = None 3677 3678 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3679 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3680 3681 if not skip_join_token and not join and not outer_apply and not cross_apply: 3682 return None 3683 3684 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3685 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3686 kwargs["expressions"] = self._parse_csv( 3687 lambda: self._parse_table(parse_bracket=parse_bracket) 3688 ) 3689 3690 if method: 3691 kwargs["method"] = method.text 3692 if side: 3693 kwargs["side"] = side.text 3694 if kind: 3695 kwargs["kind"] = kind.text 3696 if hint: 3697 kwargs["hint"] = hint 3698 3699 if self._match(TokenType.MATCH_CONDITION): 3700 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3701 3702 if self._match(TokenType.ON): 3703 kwargs["on"] = self._parse_assignment() 3704 elif self._match(TokenType.USING): 3705 kwargs["using"] = self._parse_using_identifiers() 3706 elif ( 3707 not (outer_apply or cross_apply) 3708 and not isinstance(kwargs["this"], exp.Unnest) 3709 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3710 ): 3711 index = self._index 3712 joins: t.Optional[list] = list(self._parse_joins()) 3713 3714 if joins and self._match(TokenType.ON): 3715 kwargs["on"] = self._parse_assignment() 3716 elif joins and self._match(TokenType.USING): 3717 kwargs["using"] = self._parse_using_identifiers() 3718 else: 3719 joins = None 3720 self._retreat(index) 3721 3722 kwargs["this"].set("joins", joins if joins else None) 3723 3724 comments = [c for token in (method, side, kind) if token for c in token.comments] 3725 return self.expression(exp.Join, comments=comments, **kwargs) 3726 3727 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3728 this = self._parse_assignment() 3729 3730 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3731 return this 3732 3733 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3734 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3735 3736 return this 3737 3738 def _parse_index_params(self) -> exp.IndexParameters: 3739 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3740 3741 if self._match(TokenType.L_PAREN, advance=False): 3742 columns = self._parse_wrapped_csv(self._parse_with_operator) 3743 else: 3744 columns = None 3745 3746 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3747 partition_by = self._parse_partition_by() 3748 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3749 tablespace = ( 3750 self._parse_var(any_token=True) 3751 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3752 else None 3753 ) 3754 where = self._parse_where() 3755 3756 on = self._parse_field() if self._match(TokenType.ON) else None 3757 3758 return self.expression( 3759 exp.IndexParameters, 3760 using=using, 3761 columns=columns, 3762 include=include, 3763 partition_by=partition_by, 3764 where=where, 3765 with_storage=with_storage, 3766 tablespace=tablespace, 3767 on=on, 3768 ) 3769 3770 def _parse_index( 3771 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3772 ) -> t.Optional[exp.Index]: 3773 if index or anonymous: 3774 unique = None 3775 primary = None 3776 amp = None 3777 3778 self._match(TokenType.ON) 3779 self._match(TokenType.TABLE) # hive 3780 table = self._parse_table_parts(schema=True) 3781 else: 3782 unique = self._match(TokenType.UNIQUE) 3783 primary = self._match_text_seq("PRIMARY") 3784 amp = self._match_text_seq("AMP") 3785 3786 if not self._match(TokenType.INDEX): 3787 return None 3788 3789 index = self._parse_id_var() 3790 table = None 3791 3792 params = self._parse_index_params() 3793 3794 return self.expression( 3795 exp.Index, 3796 this=index, 3797 table=table, 3798 unique=unique, 3799 primary=primary, 3800 amp=amp, 3801 params=params, 3802 ) 3803 3804 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3805 hints: t.List[exp.Expression] = [] 3806 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3807 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3808 hints.append( 3809 self.expression( 3810 exp.WithTableHint, 3811 expressions=self._parse_csv( 3812 lambda: self._parse_function() or self._parse_var(any_token=True) 3813 ), 3814 ) 3815 ) 3816 self._match_r_paren() 3817 else: 3818 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3819 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3820 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3821 3822 self._match_set((TokenType.INDEX, TokenType.KEY)) 3823 if self._match(TokenType.FOR): 3824 hint.set("target", self._advance_any() and self._prev.text.upper()) 3825 3826 hint.set("expressions", self._parse_wrapped_id_vars()) 3827 hints.append(hint) 3828 3829 return hints or None 3830 3831 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3832 return ( 3833 (not schema and self._parse_function(optional_parens=False)) 3834 or self._parse_id_var(any_token=False) 3835 or self._parse_string_as_identifier() 3836 or self._parse_placeholder() 3837 ) 3838 3839 def _parse_table_parts( 3840 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3841 ) -> exp.Table: 3842 catalog = None 3843 db = None 3844 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3845 3846 while self._match(TokenType.DOT): 3847 if catalog: 3848 # This allows nesting the table in arbitrarily many dot expressions if needed 3849 table = self.expression( 3850 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3851 ) 3852 else: 3853 catalog = db 3854 db = table 3855 # "" used for tsql FROM a..b case 3856 table = self._parse_table_part(schema=schema) or "" 3857 3858 if ( 3859 wildcard 3860 and self._is_connected() 3861 and (isinstance(table, exp.Identifier) or not table) 3862 and self._match(TokenType.STAR) 3863 ): 3864 if isinstance(table, exp.Identifier): 3865 table.args["this"] += "*" 3866 else: 3867 table = exp.Identifier(this="*") 3868 3869 # We bubble up comments from the Identifier to the Table 3870 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3871 3872 if is_db_reference: 3873 catalog = db 3874 db = table 3875 table = None 3876 3877 if not table and not is_db_reference: 3878 self.raise_error(f"Expected table name but got {self._curr}") 3879 if not db and is_db_reference: 3880 self.raise_error(f"Expected database name but got {self._curr}") 3881 3882 table = self.expression( 3883 exp.Table, 3884 comments=comments, 3885 this=table, 3886 db=db, 3887 catalog=catalog, 3888 ) 3889 3890 changes = self._parse_changes() 3891 if changes: 3892 table.set("changes", changes) 3893 3894 at_before = self._parse_historical_data() 3895 if at_before: 3896 table.set("when", at_before) 3897 3898 pivots = self._parse_pivots() 3899 if pivots: 3900 table.set("pivots", pivots) 3901 3902 return table 3903 3904 def _parse_table( 3905 self, 3906 schema: bool = False, 3907 joins: bool = False, 3908 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3909 parse_bracket: bool = False, 3910 is_db_reference: bool = False, 3911 parse_partition: bool = False, 3912 ) -> t.Optional[exp.Expression]: 3913 lateral = self._parse_lateral() 3914 if lateral: 3915 return lateral 3916 3917 unnest = self._parse_unnest() 3918 if unnest: 3919 return unnest 3920 3921 values = self._parse_derived_table_values() 3922 if values: 3923 return values 3924 3925 subquery = self._parse_select(table=True) 3926 if subquery: 3927 if not subquery.args.get("pivots"): 3928 subquery.set("pivots", self._parse_pivots()) 3929 return subquery 3930 3931 bracket = parse_bracket and self._parse_bracket(None) 3932 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3933 3934 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3935 self._parse_table 3936 ) 3937 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3938 3939 only = self._match(TokenType.ONLY) 3940 3941 this = t.cast( 3942 exp.Expression, 3943 bracket 3944 or rows_from 3945 or self._parse_bracket( 3946 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3947 ), 3948 ) 3949 3950 if only: 3951 this.set("only", only) 3952 3953 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3954 self._match_text_seq("*") 3955 3956 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3957 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3958 this.set("partition", self._parse_partition()) 3959 3960 if schema: 3961 return self._parse_schema(this=this) 3962 3963 version = self._parse_version() 3964 3965 if version: 3966 this.set("version", version) 3967 3968 if self.dialect.ALIAS_POST_TABLESAMPLE: 3969 this.set("sample", self._parse_table_sample()) 3970 3971 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3972 if alias: 3973 this.set("alias", alias) 3974 3975 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3976 return self.expression( 3977 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3978 ) 3979 3980 this.set("hints", self._parse_table_hints()) 3981 3982 if not this.args.get("pivots"): 3983 this.set("pivots", self._parse_pivots()) 3984 3985 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3986 this.set("sample", self._parse_table_sample()) 3987 3988 if joins: 3989 for join in self._parse_joins(): 3990 this.append("joins", join) 3991 3992 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3993 this.set("ordinality", True) 3994 this.set("alias", self._parse_table_alias()) 3995 3996 return this 3997 3998 def _parse_version(self) -> t.Optional[exp.Version]: 3999 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4000 this = "TIMESTAMP" 4001 elif self._match(TokenType.VERSION_SNAPSHOT): 4002 this = "VERSION" 4003 else: 4004 return None 4005 4006 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4007 kind = self._prev.text.upper() 4008 start = self._parse_bitwise() 4009 self._match_texts(("TO", "AND")) 4010 end = self._parse_bitwise() 4011 expression: t.Optional[exp.Expression] = self.expression( 4012 exp.Tuple, expressions=[start, end] 4013 ) 4014 elif self._match_text_seq("CONTAINED", "IN"): 4015 kind = "CONTAINED IN" 4016 expression = self.expression( 4017 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4018 ) 4019 elif self._match(TokenType.ALL): 4020 kind = "ALL" 4021 expression = None 4022 else: 4023 self._match_text_seq("AS", "OF") 4024 kind = "AS OF" 4025 expression = self._parse_type() 4026 4027 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4028 4029 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4030 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4031 index = self._index 4032 historical_data = None 4033 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4034 this = self._prev.text.upper() 4035 kind = ( 4036 self._match(TokenType.L_PAREN) 4037 and self._match_texts(self.HISTORICAL_DATA_KIND) 4038 and self._prev.text.upper() 4039 ) 4040 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4041 4042 if expression: 4043 self._match_r_paren() 4044 historical_data = self.expression( 4045 exp.HistoricalData, this=this, kind=kind, expression=expression 4046 ) 4047 else: 4048 self._retreat(index) 4049 4050 return historical_data 4051 4052 def _parse_changes(self) -> t.Optional[exp.Changes]: 4053 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4054 return None 4055 4056 information = self._parse_var(any_token=True) 4057 self._match_r_paren() 4058 4059 return self.expression( 4060 exp.Changes, 4061 information=information, 4062 at_before=self._parse_historical_data(), 4063 end=self._parse_historical_data(), 4064 ) 4065 4066 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4067 if not self._match(TokenType.UNNEST): 4068 return None 4069 4070 expressions = self._parse_wrapped_csv(self._parse_equality) 4071 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4072 4073 alias = self._parse_table_alias() if with_alias else None 4074 4075 if alias: 4076 if self.dialect.UNNEST_COLUMN_ONLY: 4077 if alias.args.get("columns"): 4078 self.raise_error("Unexpected extra column alias in unnest.") 4079 4080 alias.set("columns", [alias.this]) 4081 alias.set("this", None) 4082 4083 columns = alias.args.get("columns") or [] 4084 if offset and len(expressions) < len(columns): 4085 offset = columns.pop() 4086 4087 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4088 self._match(TokenType.ALIAS) 4089 offset = self._parse_id_var( 4090 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4091 ) or exp.to_identifier("offset") 4092 4093 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4094 4095 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4096 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4097 if not is_derived and not ( 4098 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4099 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4100 ): 4101 return None 4102 4103 expressions = self._parse_csv(self._parse_value) 4104 alias = self._parse_table_alias() 4105 4106 if is_derived: 4107 self._match_r_paren() 4108 4109 return self.expression( 4110 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4111 ) 4112 4113 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4114 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4115 as_modifier and self._match_text_seq("USING", "SAMPLE") 4116 ): 4117 return None 4118 4119 bucket_numerator = None 4120 bucket_denominator = None 4121 bucket_field = None 4122 percent = None 4123 size = None 4124 seed = None 4125 4126 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4127 matched_l_paren = self._match(TokenType.L_PAREN) 4128 4129 if self.TABLESAMPLE_CSV: 4130 num = None 4131 expressions = self._parse_csv(self._parse_primary) 4132 else: 4133 expressions = None 4134 num = ( 4135 self._parse_factor() 4136 if self._match(TokenType.NUMBER, advance=False) 4137 else self._parse_primary() or self._parse_placeholder() 4138 ) 4139 4140 if self._match_text_seq("BUCKET"): 4141 bucket_numerator = self._parse_number() 4142 self._match_text_seq("OUT", "OF") 4143 bucket_denominator = bucket_denominator = self._parse_number() 4144 self._match(TokenType.ON) 4145 bucket_field = self._parse_field() 4146 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4147 percent = num 4148 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4149 size = num 4150 else: 4151 percent = num 4152 4153 if matched_l_paren: 4154 self._match_r_paren() 4155 4156 if self._match(TokenType.L_PAREN): 4157 method = self._parse_var(upper=True) 4158 seed = self._match(TokenType.COMMA) and self._parse_number() 4159 self._match_r_paren() 4160 elif self._match_texts(("SEED", "REPEATABLE")): 4161 seed = self._parse_wrapped(self._parse_number) 4162 4163 if not method and self.DEFAULT_SAMPLING_METHOD: 4164 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4165 4166 return self.expression( 4167 exp.TableSample, 4168 expressions=expressions, 4169 method=method, 4170 bucket_numerator=bucket_numerator, 4171 bucket_denominator=bucket_denominator, 4172 bucket_field=bucket_field, 4173 percent=percent, 4174 size=size, 4175 seed=seed, 4176 ) 4177 4178 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4179 return list(iter(self._parse_pivot, None)) or None 4180 4181 def _parse_joins(self) -> t.Iterator[exp.Join]: 4182 return iter(self._parse_join, None) 4183 4184 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4185 if not self._match(TokenType.INTO): 4186 return None 4187 4188 return self.expression( 4189 exp.UnpivotColumns, 4190 this=self._match_text_seq("NAME") and self._parse_column(), 4191 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4192 ) 4193 4194 # https://duckdb.org/docs/sql/statements/pivot 4195 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4196 def _parse_on() -> t.Optional[exp.Expression]: 4197 this = self._parse_bitwise() 4198 4199 if self._match(TokenType.IN): 4200 # PIVOT ... ON col IN (row_val1, row_val2) 4201 return self._parse_in(this) 4202 if self._match(TokenType.ALIAS, advance=False): 4203 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4204 return self._parse_alias(this) 4205 4206 return this 4207 4208 this = self._parse_table() 4209 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4210 into = self._parse_unpivot_columns() 4211 using = self._match(TokenType.USING) and self._parse_csv( 4212 lambda: self._parse_alias(self._parse_function()) 4213 ) 4214 group = self._parse_group() 4215 4216 return self.expression( 4217 exp.Pivot, 4218 this=this, 4219 expressions=expressions, 4220 using=using, 4221 group=group, 4222 unpivot=is_unpivot, 4223 into=into, 4224 ) 4225 4226 def _parse_pivot_in(self) -> exp.In: 4227 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4228 this = self._parse_select_or_expression() 4229 4230 self._match(TokenType.ALIAS) 4231 alias = self._parse_bitwise() 4232 if alias: 4233 if isinstance(alias, exp.Column) and not alias.db: 4234 alias = alias.this 4235 return self.expression(exp.PivotAlias, this=this, alias=alias) 4236 4237 return this 4238 4239 value = self._parse_column() 4240 4241 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4242 self.raise_error("Expecting IN (") 4243 4244 if self._match(TokenType.ANY): 4245 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4246 else: 4247 exprs = self._parse_csv(_parse_aliased_expression) 4248 4249 self._match_r_paren() 4250 return self.expression(exp.In, this=value, expressions=exprs) 4251 4252 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4253 index = self._index 4254 include_nulls = None 4255 4256 if self._match(TokenType.PIVOT): 4257 unpivot = False 4258 elif self._match(TokenType.UNPIVOT): 4259 unpivot = True 4260 4261 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4262 if self._match_text_seq("INCLUDE", "NULLS"): 4263 include_nulls = True 4264 elif self._match_text_seq("EXCLUDE", "NULLS"): 4265 include_nulls = False 4266 else: 4267 return None 4268 4269 expressions = [] 4270 4271 if not self._match(TokenType.L_PAREN): 4272 self._retreat(index) 4273 return None 4274 4275 if unpivot: 4276 expressions = self._parse_csv(self._parse_column) 4277 else: 4278 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4279 4280 if not expressions: 4281 self.raise_error("Failed to parse PIVOT's aggregation list") 4282 4283 if not self._match(TokenType.FOR): 4284 self.raise_error("Expecting FOR") 4285 4286 fields = [] 4287 while True: 4288 field = self._try_parse(self._parse_pivot_in) 4289 if not field: 4290 break 4291 fields.append(field) 4292 4293 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4294 self._parse_bitwise 4295 ) 4296 4297 group = self._parse_group() 4298 4299 self._match_r_paren() 4300 4301 pivot = self.expression( 4302 exp.Pivot, 4303 expressions=expressions, 4304 fields=fields, 4305 unpivot=unpivot, 4306 include_nulls=include_nulls, 4307 default_on_null=default_on_null, 4308 group=group, 4309 ) 4310 4311 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4312 pivot.set("alias", self._parse_table_alias()) 4313 4314 if not unpivot: 4315 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4316 4317 columns: t.List[exp.Expression] = [] 4318 all_fields = [] 4319 for pivot_field in pivot.fields: 4320 pivot_field_expressions = pivot_field.expressions 4321 4322 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4323 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4324 continue 4325 4326 all_fields.append( 4327 [ 4328 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4329 for fld in pivot_field_expressions 4330 ] 4331 ) 4332 4333 if all_fields: 4334 if names: 4335 all_fields.append(names) 4336 4337 # Generate all possible combinations of the pivot columns 4338 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4339 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4340 for fld_parts_tuple in itertools.product(*all_fields): 4341 fld_parts = list(fld_parts_tuple) 4342 4343 if names and self.PREFIXED_PIVOT_COLUMNS: 4344 # Move the "name" to the front of the list 4345 fld_parts.insert(0, fld_parts.pop(-1)) 4346 4347 columns.append(exp.to_identifier("_".join(fld_parts))) 4348 4349 pivot.set("columns", columns) 4350 4351 return pivot 4352 4353 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4354 return [agg.alias for agg in aggregations if agg.alias] 4355 4356 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4357 if not skip_where_token and not self._match(TokenType.PREWHERE): 4358 return None 4359 4360 return self.expression( 4361 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4362 ) 4363 4364 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4365 if not skip_where_token and not self._match(TokenType.WHERE): 4366 return None 4367 4368 return self.expression( 4369 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4370 ) 4371 4372 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4373 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4374 return None 4375 4376 elements: t.Dict[str, t.Any] = defaultdict(list) 4377 4378 if self._match(TokenType.ALL): 4379 elements["all"] = True 4380 elif self._match(TokenType.DISTINCT): 4381 elements["all"] = False 4382 4383 while True: 4384 index = self._index 4385 4386 elements["expressions"].extend( 4387 self._parse_csv( 4388 lambda: None 4389 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4390 else self._parse_assignment() 4391 ) 4392 ) 4393 4394 before_with_index = self._index 4395 with_prefix = self._match(TokenType.WITH) 4396 4397 if self._match(TokenType.ROLLUP): 4398 elements["rollup"].append( 4399 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4400 ) 4401 elif self._match(TokenType.CUBE): 4402 elements["cube"].append( 4403 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4404 ) 4405 elif self._match(TokenType.GROUPING_SETS): 4406 elements["grouping_sets"].append( 4407 self.expression( 4408 exp.GroupingSets, 4409 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4410 ) 4411 ) 4412 elif self._match_text_seq("TOTALS"): 4413 elements["totals"] = True # type: ignore 4414 4415 if before_with_index <= self._index <= before_with_index + 1: 4416 self._retreat(before_with_index) 4417 break 4418 4419 if index == self._index: 4420 break 4421 4422 return self.expression(exp.Group, **elements) # type: ignore 4423 4424 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4425 return self.expression( 4426 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4427 ) 4428 4429 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4430 if self._match(TokenType.L_PAREN): 4431 grouping_set = self._parse_csv(self._parse_column) 4432 self._match_r_paren() 4433 return self.expression(exp.Tuple, expressions=grouping_set) 4434 4435 return self._parse_column() 4436 4437 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4438 if not skip_having_token and not self._match(TokenType.HAVING): 4439 return None 4440 return self.expression(exp.Having, this=self._parse_assignment()) 4441 4442 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4443 if not self._match(TokenType.QUALIFY): 4444 return None 4445 return self.expression(exp.Qualify, this=self._parse_assignment()) 4446 4447 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4448 if skip_start_token: 4449 start = None 4450 elif self._match(TokenType.START_WITH): 4451 start = self._parse_assignment() 4452 else: 4453 return None 4454 4455 self._match(TokenType.CONNECT_BY) 4456 nocycle = self._match_text_seq("NOCYCLE") 4457 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4458 exp.Prior, this=self._parse_bitwise() 4459 ) 4460 connect = self._parse_assignment() 4461 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4462 4463 if not start and self._match(TokenType.START_WITH): 4464 start = self._parse_assignment() 4465 4466 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4467 4468 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4469 this = self._parse_id_var(any_token=True) 4470 if self._match(TokenType.ALIAS): 4471 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4472 return this 4473 4474 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4475 if self._match_text_seq("INTERPOLATE"): 4476 return self._parse_wrapped_csv(self._parse_name_as_expression) 4477 return None 4478 4479 def _parse_order( 4480 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4481 ) -> t.Optional[exp.Expression]: 4482 siblings = None 4483 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4484 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4485 return this 4486 4487 siblings = True 4488 4489 return self.expression( 4490 exp.Order, 4491 this=this, 4492 expressions=self._parse_csv(self._parse_ordered), 4493 siblings=siblings, 4494 ) 4495 4496 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4497 if not self._match(token): 4498 return None 4499 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4500 4501 def _parse_ordered( 4502 self, parse_method: t.Optional[t.Callable] = None 4503 ) -> t.Optional[exp.Ordered]: 4504 this = parse_method() if parse_method else self._parse_assignment() 4505 if not this: 4506 return None 4507 4508 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4509 this = exp.var("ALL") 4510 4511 asc = self._match(TokenType.ASC) 4512 desc = self._match(TokenType.DESC) or (asc and False) 4513 4514 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4515 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4516 4517 nulls_first = is_nulls_first or False 4518 explicitly_null_ordered = is_nulls_first or is_nulls_last 4519 4520 if ( 4521 not explicitly_null_ordered 4522 and ( 4523 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4524 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4525 ) 4526 and self.dialect.NULL_ORDERING != "nulls_are_last" 4527 ): 4528 nulls_first = True 4529 4530 if self._match_text_seq("WITH", "FILL"): 4531 with_fill = self.expression( 4532 exp.WithFill, 4533 **{ # type: ignore 4534 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4535 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4536 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4537 "interpolate": self._parse_interpolate(), 4538 }, 4539 ) 4540 else: 4541 with_fill = None 4542 4543 return self.expression( 4544 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4545 ) 4546 4547 def _parse_limit_options(self) -> exp.LimitOptions: 4548 percent = self._match(TokenType.PERCENT) 4549 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4550 self._match_text_seq("ONLY") 4551 with_ties = self._match_text_seq("WITH", "TIES") 4552 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4553 4554 def _parse_limit( 4555 self, 4556 this: t.Optional[exp.Expression] = None, 4557 top: bool = False, 4558 skip_limit_token: bool = False, 4559 ) -> t.Optional[exp.Expression]: 4560 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4561 comments = self._prev_comments 4562 if top: 4563 limit_paren = self._match(TokenType.L_PAREN) 4564 expression = self._parse_term() if limit_paren else self._parse_number() 4565 4566 if limit_paren: 4567 self._match_r_paren() 4568 4569 limit_options = self._parse_limit_options() 4570 else: 4571 limit_options = None 4572 expression = self._parse_term() 4573 4574 if self._match(TokenType.COMMA): 4575 offset = expression 4576 expression = self._parse_term() 4577 else: 4578 offset = None 4579 4580 limit_exp = self.expression( 4581 exp.Limit, 4582 this=this, 4583 expression=expression, 4584 offset=offset, 4585 comments=comments, 4586 limit_options=limit_options, 4587 expressions=self._parse_limit_by(), 4588 ) 4589 4590 return limit_exp 4591 4592 if self._match(TokenType.FETCH): 4593 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4594 direction = self._prev.text.upper() if direction else "FIRST" 4595 4596 count = self._parse_field(tokens=self.FETCH_TOKENS) 4597 4598 return self.expression( 4599 exp.Fetch, 4600 direction=direction, 4601 count=count, 4602 limit_options=self._parse_limit_options(), 4603 ) 4604 4605 return this 4606 4607 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4608 if not self._match(TokenType.OFFSET): 4609 return this 4610 4611 count = self._parse_term() 4612 self._match_set((TokenType.ROW, TokenType.ROWS)) 4613 4614 return self.expression( 4615 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4616 ) 4617 4618 def _can_parse_limit_or_offset(self) -> bool: 4619 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4620 return False 4621 4622 index = self._index 4623 result = bool( 4624 self._try_parse(self._parse_limit, retreat=True) 4625 or self._try_parse(self._parse_offset, retreat=True) 4626 ) 4627 self._retreat(index) 4628 return result 4629 4630 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4631 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4632 4633 def _parse_locks(self) -> t.List[exp.Lock]: 4634 locks = [] 4635 while True: 4636 if self._match_text_seq("FOR", "UPDATE"): 4637 update = True 4638 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4639 "LOCK", "IN", "SHARE", "MODE" 4640 ): 4641 update = False 4642 else: 4643 break 4644 4645 expressions = None 4646 if self._match_text_seq("OF"): 4647 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4648 4649 wait: t.Optional[bool | exp.Expression] = None 4650 if self._match_text_seq("NOWAIT"): 4651 wait = True 4652 elif self._match_text_seq("WAIT"): 4653 wait = self._parse_primary() 4654 elif self._match_text_seq("SKIP", "LOCKED"): 4655 wait = False 4656 4657 locks.append( 4658 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4659 ) 4660 4661 return locks 4662 4663 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4664 start = self._index 4665 _, side_token, kind_token = self._parse_join_parts() 4666 4667 side = side_token.text if side_token else None 4668 kind = kind_token.text if kind_token else None 4669 4670 if not self._match_set(self.SET_OPERATIONS): 4671 self._retreat(start) 4672 return None 4673 4674 token_type = self._prev.token_type 4675 4676 if token_type == TokenType.UNION: 4677 operation: t.Type[exp.SetOperation] = exp.Union 4678 elif token_type == TokenType.EXCEPT: 4679 operation = exp.Except 4680 else: 4681 operation = exp.Intersect 4682 4683 comments = self._prev.comments 4684 4685 if self._match(TokenType.DISTINCT): 4686 distinct: t.Optional[bool] = True 4687 elif self._match(TokenType.ALL): 4688 distinct = False 4689 else: 4690 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4691 if distinct is None: 4692 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4693 4694 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4695 "STRICT", "CORRESPONDING" 4696 ) 4697 if self._match_text_seq("CORRESPONDING"): 4698 by_name = True 4699 if not side and not kind: 4700 kind = "INNER" 4701 4702 on_column_list = None 4703 if by_name and self._match_texts(("ON", "BY")): 4704 on_column_list = self._parse_wrapped_csv(self._parse_column) 4705 4706 expression = self._parse_select(nested=True, parse_set_operation=False) 4707 4708 return self.expression( 4709 operation, 4710 comments=comments, 4711 this=this, 4712 distinct=distinct, 4713 by_name=by_name, 4714 expression=expression, 4715 side=side, 4716 kind=kind, 4717 on=on_column_list, 4718 ) 4719 4720 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4721 while True: 4722 setop = self.parse_set_operation(this) 4723 if not setop: 4724 break 4725 this = setop 4726 4727 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4728 expression = this.expression 4729 4730 if expression: 4731 for arg in self.SET_OP_MODIFIERS: 4732 expr = expression.args.get(arg) 4733 if expr: 4734 this.set(arg, expr.pop()) 4735 4736 return this 4737 4738 def _parse_expression(self) -> t.Optional[exp.Expression]: 4739 return self._parse_alias(self._parse_assignment()) 4740 4741 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4742 this = self._parse_disjunction() 4743 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4744 # This allows us to parse <non-identifier token> := <expr> 4745 this = exp.column( 4746 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4747 ) 4748 4749 while self._match_set(self.ASSIGNMENT): 4750 if isinstance(this, exp.Column) and len(this.parts) == 1: 4751 this = this.this 4752 4753 this = self.expression( 4754 self.ASSIGNMENT[self._prev.token_type], 4755 this=this, 4756 comments=self._prev_comments, 4757 expression=self._parse_assignment(), 4758 ) 4759 4760 return this 4761 4762 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4763 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4764 4765 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4766 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4767 4768 def _parse_equality(self) -> t.Optional[exp.Expression]: 4769 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4770 4771 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4772 return self._parse_tokens(self._parse_range, self.COMPARISON) 4773 4774 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4775 this = this or self._parse_bitwise() 4776 negate = self._match(TokenType.NOT) 4777 4778 if self._match_set(self.RANGE_PARSERS): 4779 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4780 if not expression: 4781 return this 4782 4783 this = expression 4784 elif self._match(TokenType.ISNULL): 4785 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4786 4787 # Postgres supports ISNULL and NOTNULL for conditions. 4788 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4789 if self._match(TokenType.NOTNULL): 4790 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4791 this = self.expression(exp.Not, this=this) 4792 4793 if negate: 4794 this = self._negate_range(this) 4795 4796 if self._match(TokenType.IS): 4797 this = self._parse_is(this) 4798 4799 return this 4800 4801 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4802 if not this: 4803 return this 4804 4805 return self.expression(exp.Not, this=this) 4806 4807 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4808 index = self._index - 1 4809 negate = self._match(TokenType.NOT) 4810 4811 if self._match_text_seq("DISTINCT", "FROM"): 4812 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4813 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4814 4815 if self._match(TokenType.JSON): 4816 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4817 4818 if self._match_text_seq("WITH"): 4819 _with = True 4820 elif self._match_text_seq("WITHOUT"): 4821 _with = False 4822 else: 4823 _with = None 4824 4825 unique = self._match(TokenType.UNIQUE) 4826 self._match_text_seq("KEYS") 4827 expression: t.Optional[exp.Expression] = self.expression( 4828 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4829 ) 4830 else: 4831 expression = self._parse_primary() or self._parse_null() 4832 if not expression: 4833 self._retreat(index) 4834 return None 4835 4836 this = self.expression(exp.Is, this=this, expression=expression) 4837 return self.expression(exp.Not, this=this) if negate else this 4838 4839 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4840 unnest = self._parse_unnest(with_alias=False) 4841 if unnest: 4842 this = self.expression(exp.In, this=this, unnest=unnest) 4843 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4844 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4845 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4846 4847 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4848 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4849 else: 4850 this = self.expression(exp.In, this=this, expressions=expressions) 4851 4852 if matched_l_paren: 4853 self._match_r_paren(this) 4854 elif not self._match(TokenType.R_BRACKET, expression=this): 4855 self.raise_error("Expecting ]") 4856 else: 4857 this = self.expression(exp.In, this=this, field=self._parse_column()) 4858 4859 return this 4860 4861 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4862 low = self._parse_bitwise() 4863 self._match(TokenType.AND) 4864 high = self._parse_bitwise() 4865 return self.expression(exp.Between, this=this, low=low, high=high) 4866 4867 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4868 if not self._match(TokenType.ESCAPE): 4869 return this 4870 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4871 4872 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4873 index = self._index 4874 4875 if not self._match(TokenType.INTERVAL) and match_interval: 4876 return None 4877 4878 if self._match(TokenType.STRING, advance=False): 4879 this = self._parse_primary() 4880 else: 4881 this = self._parse_term() 4882 4883 if not this or ( 4884 isinstance(this, exp.Column) 4885 and not this.table 4886 and not this.this.quoted 4887 and this.name.upper() == "IS" 4888 ): 4889 self._retreat(index) 4890 return None 4891 4892 unit = self._parse_function() or ( 4893 not self._match(TokenType.ALIAS, advance=False) 4894 and self._parse_var(any_token=True, upper=True) 4895 ) 4896 4897 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4898 # each INTERVAL expression into this canonical form so it's easy to transpile 4899 if this and this.is_number: 4900 this = exp.Literal.string(this.to_py()) 4901 elif this and this.is_string: 4902 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4903 if parts and unit: 4904 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4905 unit = None 4906 self._retreat(self._index - 1) 4907 4908 if len(parts) == 1: 4909 this = exp.Literal.string(parts[0][0]) 4910 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4911 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4912 unit = self.expression( 4913 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4914 ) 4915 4916 interval = self.expression(exp.Interval, this=this, unit=unit) 4917 4918 index = self._index 4919 self._match(TokenType.PLUS) 4920 4921 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4922 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4923 return self.expression( 4924 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4925 ) 4926 4927 self._retreat(index) 4928 return interval 4929 4930 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4931 this = self._parse_term() 4932 4933 while True: 4934 if self._match_set(self.BITWISE): 4935 this = self.expression( 4936 self.BITWISE[self._prev.token_type], 4937 this=this, 4938 expression=self._parse_term(), 4939 ) 4940 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4941 this = self.expression( 4942 exp.DPipe, 4943 this=this, 4944 expression=self._parse_term(), 4945 safe=not self.dialect.STRICT_STRING_CONCAT, 4946 ) 4947 elif self._match(TokenType.DQMARK): 4948 this = self.expression( 4949 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4950 ) 4951 elif self._match_pair(TokenType.LT, TokenType.LT): 4952 this = self.expression( 4953 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4954 ) 4955 elif self._match_pair(TokenType.GT, TokenType.GT): 4956 this = self.expression( 4957 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4958 ) 4959 else: 4960 break 4961 4962 return this 4963 4964 def _parse_term(self) -> t.Optional[exp.Expression]: 4965 this = self._parse_factor() 4966 4967 while self._match_set(self.TERM): 4968 klass = self.TERM[self._prev.token_type] 4969 comments = self._prev_comments 4970 expression = self._parse_factor() 4971 4972 this = self.expression(klass, this=this, comments=comments, expression=expression) 4973 4974 if isinstance(this, exp.Collate): 4975 expr = this.expression 4976 4977 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4978 # fallback to Identifier / Var 4979 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4980 ident = expr.this 4981 if isinstance(ident, exp.Identifier): 4982 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4983 4984 return this 4985 4986 def _parse_factor(self) -> t.Optional[exp.Expression]: 4987 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4988 this = parse_method() 4989 4990 while self._match_set(self.FACTOR): 4991 klass = self.FACTOR[self._prev.token_type] 4992 comments = self._prev_comments 4993 expression = parse_method() 4994 4995 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4996 self._retreat(self._index - 1) 4997 return this 4998 4999 this = self.expression(klass, this=this, comments=comments, expression=expression) 5000 5001 if isinstance(this, exp.Div): 5002 this.args["typed"] = self.dialect.TYPED_DIVISION 5003 this.args["safe"] = self.dialect.SAFE_DIVISION 5004 5005 return this 5006 5007 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5008 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5009 5010 def _parse_unary(self) -> t.Optional[exp.Expression]: 5011 if self._match_set(self.UNARY_PARSERS): 5012 return self.UNARY_PARSERS[self._prev.token_type](self) 5013 return self._parse_at_time_zone(self._parse_type()) 5014 5015 def _parse_type( 5016 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5017 ) -> t.Optional[exp.Expression]: 5018 interval = parse_interval and self._parse_interval() 5019 if interval: 5020 return interval 5021 5022 index = self._index 5023 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5024 5025 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5026 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5027 if isinstance(data_type, exp.Cast): 5028 # This constructor can contain ops directly after it, for instance struct unnesting: 5029 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5030 return self._parse_column_ops(data_type) 5031 5032 if data_type: 5033 index2 = self._index 5034 this = self._parse_primary() 5035 5036 if isinstance(this, exp.Literal): 5037 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5038 if parser: 5039 return parser(self, this, data_type) 5040 5041 return self.expression(exp.Cast, this=this, to=data_type) 5042 5043 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5044 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5045 # 5046 # If the index difference here is greater than 1, that means the parser itself must have 5047 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5048 # 5049 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5050 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5051 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5052 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5053 # 5054 # In these cases, we don't really want to return the converted type, but instead retreat 5055 # and try to parse a Column or Identifier in the section below. 5056 if data_type.expressions and index2 - index > 1: 5057 self._retreat(index2) 5058 return self._parse_column_ops(data_type) 5059 5060 self._retreat(index) 5061 5062 if fallback_to_identifier: 5063 return self._parse_id_var() 5064 5065 this = self._parse_column() 5066 return this and self._parse_column_ops(this) 5067 5068 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5069 this = self._parse_type() 5070 if not this: 5071 return None 5072 5073 if isinstance(this, exp.Column) and not this.table: 5074 this = exp.var(this.name.upper()) 5075 5076 return self.expression( 5077 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5078 ) 5079 5080 def _parse_types( 5081 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5082 ) -> t.Optional[exp.Expression]: 5083 index = self._index 5084 5085 this: t.Optional[exp.Expression] = None 5086 prefix = self._match_text_seq("SYSUDTLIB", ".") 5087 5088 if not self._match_set(self.TYPE_TOKENS): 5089 identifier = allow_identifiers and self._parse_id_var( 5090 any_token=False, tokens=(TokenType.VAR,) 5091 ) 5092 if isinstance(identifier, exp.Identifier): 5093 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5094 5095 if len(tokens) != 1: 5096 self.raise_error("Unexpected identifier", self._prev) 5097 5098 if tokens[0].token_type in self.TYPE_TOKENS: 5099 self._prev = tokens[0] 5100 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5101 type_name = identifier.name 5102 5103 while self._match(TokenType.DOT): 5104 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5105 5106 this = exp.DataType.build(type_name, udt=True) 5107 else: 5108 self._retreat(self._index - 1) 5109 return None 5110 else: 5111 return None 5112 5113 type_token = self._prev.token_type 5114 5115 if type_token == TokenType.PSEUDO_TYPE: 5116 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5117 5118 if type_token == TokenType.OBJECT_IDENTIFIER: 5119 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5120 5121 # https://materialize.com/docs/sql/types/map/ 5122 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5123 key_type = self._parse_types( 5124 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5125 ) 5126 if not self._match(TokenType.FARROW): 5127 self._retreat(index) 5128 return None 5129 5130 value_type = self._parse_types( 5131 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5132 ) 5133 if not self._match(TokenType.R_BRACKET): 5134 self._retreat(index) 5135 return None 5136 5137 return exp.DataType( 5138 this=exp.DataType.Type.MAP, 5139 expressions=[key_type, value_type], 5140 nested=True, 5141 prefix=prefix, 5142 ) 5143 5144 nested = type_token in self.NESTED_TYPE_TOKENS 5145 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5146 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5147 expressions = None 5148 maybe_func = False 5149 5150 if self._match(TokenType.L_PAREN): 5151 if is_struct: 5152 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5153 elif nested: 5154 expressions = self._parse_csv( 5155 lambda: self._parse_types( 5156 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5157 ) 5158 ) 5159 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5160 this = expressions[0] 5161 this.set("nullable", True) 5162 self._match_r_paren() 5163 return this 5164 elif type_token in self.ENUM_TYPE_TOKENS: 5165 expressions = self._parse_csv(self._parse_equality) 5166 elif is_aggregate: 5167 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5168 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5169 ) 5170 if not func_or_ident: 5171 return None 5172 expressions = [func_or_ident] 5173 if self._match(TokenType.COMMA): 5174 expressions.extend( 5175 self._parse_csv( 5176 lambda: self._parse_types( 5177 check_func=check_func, 5178 schema=schema, 5179 allow_identifiers=allow_identifiers, 5180 ) 5181 ) 5182 ) 5183 else: 5184 expressions = self._parse_csv(self._parse_type_size) 5185 5186 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5187 if type_token == TokenType.VECTOR and len(expressions) == 2: 5188 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5189 5190 if not expressions or not self._match(TokenType.R_PAREN): 5191 self._retreat(index) 5192 return None 5193 5194 maybe_func = True 5195 5196 values: t.Optional[t.List[exp.Expression]] = None 5197 5198 if nested and self._match(TokenType.LT): 5199 if is_struct: 5200 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5201 else: 5202 expressions = self._parse_csv( 5203 lambda: self._parse_types( 5204 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5205 ) 5206 ) 5207 5208 if not self._match(TokenType.GT): 5209 self.raise_error("Expecting >") 5210 5211 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5212 values = self._parse_csv(self._parse_assignment) 5213 if not values and is_struct: 5214 values = None 5215 self._retreat(self._index - 1) 5216 else: 5217 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5218 5219 if type_token in self.TIMESTAMPS: 5220 if self._match_text_seq("WITH", "TIME", "ZONE"): 5221 maybe_func = False 5222 tz_type = ( 5223 exp.DataType.Type.TIMETZ 5224 if type_token in self.TIMES 5225 else exp.DataType.Type.TIMESTAMPTZ 5226 ) 5227 this = exp.DataType(this=tz_type, expressions=expressions) 5228 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5229 maybe_func = False 5230 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5231 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5232 maybe_func = False 5233 elif type_token == TokenType.INTERVAL: 5234 unit = self._parse_var(upper=True) 5235 if unit: 5236 if self._match_text_seq("TO"): 5237 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5238 5239 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5240 else: 5241 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5242 5243 if maybe_func and check_func: 5244 index2 = self._index 5245 peek = self._parse_string() 5246 5247 if not peek: 5248 self._retreat(index) 5249 return None 5250 5251 self._retreat(index2) 5252 5253 if not this: 5254 if self._match_text_seq("UNSIGNED"): 5255 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5256 if not unsigned_type_token: 5257 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5258 5259 type_token = unsigned_type_token or type_token 5260 5261 this = exp.DataType( 5262 this=exp.DataType.Type[type_token.value], 5263 expressions=expressions, 5264 nested=nested, 5265 prefix=prefix, 5266 ) 5267 5268 # Empty arrays/structs are allowed 5269 if values is not None: 5270 cls = exp.Struct if is_struct else exp.Array 5271 this = exp.cast(cls(expressions=values), this, copy=False) 5272 5273 elif expressions: 5274 this.set("expressions", expressions) 5275 5276 # https://materialize.com/docs/sql/types/list/#type-name 5277 while self._match(TokenType.LIST): 5278 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5279 5280 index = self._index 5281 5282 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5283 matched_array = self._match(TokenType.ARRAY) 5284 5285 while self._curr: 5286 datatype_token = self._prev.token_type 5287 matched_l_bracket = self._match(TokenType.L_BRACKET) 5288 5289 if (not matched_l_bracket and not matched_array) or ( 5290 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5291 ): 5292 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5293 # not to be confused with the fixed size array parsing 5294 break 5295 5296 matched_array = False 5297 values = self._parse_csv(self._parse_assignment) or None 5298 if ( 5299 values 5300 and not schema 5301 and ( 5302 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5303 ) 5304 ): 5305 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5306 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5307 self._retreat(index) 5308 break 5309 5310 this = exp.DataType( 5311 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5312 ) 5313 self._match(TokenType.R_BRACKET) 5314 5315 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5316 converter = self.TYPE_CONVERTERS.get(this.this) 5317 if converter: 5318 this = converter(t.cast(exp.DataType, this)) 5319 5320 return this 5321 5322 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5323 index = self._index 5324 5325 if ( 5326 self._curr 5327 and self._next 5328 and self._curr.token_type in self.TYPE_TOKENS 5329 and self._next.token_type in self.TYPE_TOKENS 5330 ): 5331 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5332 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5333 this = self._parse_id_var() 5334 else: 5335 this = ( 5336 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5337 or self._parse_id_var() 5338 ) 5339 5340 self._match(TokenType.COLON) 5341 5342 if ( 5343 type_required 5344 and not isinstance(this, exp.DataType) 5345 and not self._match_set(self.TYPE_TOKENS, advance=False) 5346 ): 5347 self._retreat(index) 5348 return self._parse_types() 5349 5350 return self._parse_column_def(this) 5351 5352 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5353 if not self._match_text_seq("AT", "TIME", "ZONE"): 5354 return this 5355 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5356 5357 def _parse_column(self) -> t.Optional[exp.Expression]: 5358 this = self._parse_column_reference() 5359 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5360 5361 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5362 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5363 5364 return column 5365 5366 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5367 this = self._parse_field() 5368 if ( 5369 not this 5370 and self._match(TokenType.VALUES, advance=False) 5371 and self.VALUES_FOLLOWED_BY_PAREN 5372 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5373 ): 5374 this = self._parse_id_var() 5375 5376 if isinstance(this, exp.Identifier): 5377 # We bubble up comments from the Identifier to the Column 5378 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5379 5380 return this 5381 5382 def _parse_colon_as_variant_extract( 5383 self, this: t.Optional[exp.Expression] 5384 ) -> t.Optional[exp.Expression]: 5385 casts = [] 5386 json_path = [] 5387 escape = None 5388 5389 while self._match(TokenType.COLON): 5390 start_index = self._index 5391 5392 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5393 path = self._parse_column_ops( 5394 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5395 ) 5396 5397 # The cast :: operator has a lower precedence than the extraction operator :, so 5398 # we rearrange the AST appropriately to avoid casting the JSON path 5399 while isinstance(path, exp.Cast): 5400 casts.append(path.to) 5401 path = path.this 5402 5403 if casts: 5404 dcolon_offset = next( 5405 i 5406 for i, t in enumerate(self._tokens[start_index:]) 5407 if t.token_type == TokenType.DCOLON 5408 ) 5409 end_token = self._tokens[start_index + dcolon_offset - 1] 5410 else: 5411 end_token = self._prev 5412 5413 if path: 5414 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5415 # it'll roundtrip to a string literal in GET_PATH 5416 if isinstance(path, exp.Identifier) and path.quoted: 5417 escape = True 5418 5419 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5420 5421 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5422 # Databricks transforms it back to the colon/dot notation 5423 if json_path: 5424 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5425 5426 if json_path_expr: 5427 json_path_expr.set("escape", escape) 5428 5429 this = self.expression( 5430 exp.JSONExtract, 5431 this=this, 5432 expression=json_path_expr, 5433 variant_extract=True, 5434 ) 5435 5436 while casts: 5437 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5438 5439 return this 5440 5441 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5442 return self._parse_types() 5443 5444 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5445 this = self._parse_bracket(this) 5446 5447 while self._match_set(self.COLUMN_OPERATORS): 5448 op_token = self._prev.token_type 5449 op = self.COLUMN_OPERATORS.get(op_token) 5450 5451 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5452 field = self._parse_dcolon() 5453 if not field: 5454 self.raise_error("Expected type") 5455 elif op and self._curr: 5456 field = self._parse_column_reference() or self._parse_bracket() 5457 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5458 field = self._parse_column_ops(field) 5459 else: 5460 field = self._parse_field(any_token=True, anonymous_func=True) 5461 5462 if isinstance(field, (exp.Func, exp.Window)) and this: 5463 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5464 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5465 this = exp.replace_tree( 5466 this, 5467 lambda n: ( 5468 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5469 if n.table 5470 else n.this 5471 ) 5472 if isinstance(n, exp.Column) 5473 else n, 5474 ) 5475 5476 if op: 5477 this = op(self, this, field) 5478 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5479 this = self.expression( 5480 exp.Column, 5481 comments=this.comments, 5482 this=field, 5483 table=this.this, 5484 db=this.args.get("table"), 5485 catalog=this.args.get("db"), 5486 ) 5487 elif isinstance(field, exp.Window): 5488 # Move the exp.Dot's to the window's function 5489 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5490 field.set("this", window_func) 5491 this = field 5492 else: 5493 this = self.expression(exp.Dot, this=this, expression=field) 5494 5495 if field and field.comments: 5496 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5497 5498 this = self._parse_bracket(this) 5499 5500 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5501 5502 def _parse_primary(self) -> t.Optional[exp.Expression]: 5503 if self._match_set(self.PRIMARY_PARSERS): 5504 token_type = self._prev.token_type 5505 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5506 5507 if token_type == TokenType.STRING: 5508 expressions = [primary] 5509 while self._match(TokenType.STRING): 5510 expressions.append(exp.Literal.string(self._prev.text)) 5511 5512 if len(expressions) > 1: 5513 return self.expression(exp.Concat, expressions=expressions) 5514 5515 return primary 5516 5517 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5518 return exp.Literal.number(f"0.{self._prev.text}") 5519 5520 if self._match(TokenType.L_PAREN): 5521 comments = self._prev_comments 5522 query = self._parse_select() 5523 5524 if query: 5525 expressions = [query] 5526 else: 5527 expressions = self._parse_expressions() 5528 5529 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5530 5531 if not this and self._match(TokenType.R_PAREN, advance=False): 5532 this = self.expression(exp.Tuple) 5533 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5534 this = self._parse_subquery(this=this, parse_alias=False) 5535 elif isinstance(this, exp.Subquery): 5536 this = self._parse_subquery( 5537 this=self._parse_set_operations(this), parse_alias=False 5538 ) 5539 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5540 this = self.expression(exp.Tuple, expressions=expressions) 5541 else: 5542 this = self.expression(exp.Paren, this=this) 5543 5544 if this: 5545 this.add_comments(comments) 5546 5547 self._match_r_paren(expression=this) 5548 return this 5549 5550 return None 5551 5552 def _parse_field( 5553 self, 5554 any_token: bool = False, 5555 tokens: t.Optional[t.Collection[TokenType]] = None, 5556 anonymous_func: bool = False, 5557 ) -> t.Optional[exp.Expression]: 5558 if anonymous_func: 5559 field = ( 5560 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5561 or self._parse_primary() 5562 ) 5563 else: 5564 field = self._parse_primary() or self._parse_function( 5565 anonymous=anonymous_func, any_token=any_token 5566 ) 5567 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5568 5569 def _parse_function( 5570 self, 5571 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5572 anonymous: bool = False, 5573 optional_parens: bool = True, 5574 any_token: bool = False, 5575 ) -> t.Optional[exp.Expression]: 5576 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5577 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5578 fn_syntax = False 5579 if ( 5580 self._match(TokenType.L_BRACE, advance=False) 5581 and self._next 5582 and self._next.text.upper() == "FN" 5583 ): 5584 self._advance(2) 5585 fn_syntax = True 5586 5587 func = self._parse_function_call( 5588 functions=functions, 5589 anonymous=anonymous, 5590 optional_parens=optional_parens, 5591 any_token=any_token, 5592 ) 5593 5594 if fn_syntax: 5595 self._match(TokenType.R_BRACE) 5596 5597 return func 5598 5599 def _parse_function_call( 5600 self, 5601 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5602 anonymous: bool = False, 5603 optional_parens: bool = True, 5604 any_token: bool = False, 5605 ) -> t.Optional[exp.Expression]: 5606 if not self._curr: 5607 return None 5608 5609 comments = self._curr.comments 5610 token_type = self._curr.token_type 5611 this = self._curr.text 5612 upper = this.upper() 5613 5614 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5615 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5616 self._advance() 5617 return self._parse_window(parser(self)) 5618 5619 if not self._next or self._next.token_type != TokenType.L_PAREN: 5620 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5621 self._advance() 5622 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5623 5624 return None 5625 5626 if any_token: 5627 if token_type in self.RESERVED_TOKENS: 5628 return None 5629 elif token_type not in self.FUNC_TOKENS: 5630 return None 5631 5632 self._advance(2) 5633 5634 parser = self.FUNCTION_PARSERS.get(upper) 5635 if parser and not anonymous: 5636 this = parser(self) 5637 else: 5638 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5639 5640 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5641 this = self.expression( 5642 subquery_predicate, comments=comments, this=self._parse_select() 5643 ) 5644 self._match_r_paren() 5645 return this 5646 5647 if functions is None: 5648 functions = self.FUNCTIONS 5649 5650 function = functions.get(upper) 5651 known_function = function and not anonymous 5652 5653 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5654 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5655 5656 post_func_comments = self._curr and self._curr.comments 5657 if known_function and post_func_comments: 5658 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5659 # call we'll construct it as exp.Anonymous, even if it's "known" 5660 if any( 5661 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5662 for comment in post_func_comments 5663 ): 5664 known_function = False 5665 5666 if alias and known_function: 5667 args = self._kv_to_prop_eq(args) 5668 5669 if known_function: 5670 func_builder = t.cast(t.Callable, function) 5671 5672 if "dialect" in func_builder.__code__.co_varnames: 5673 func = func_builder(args, dialect=self.dialect) 5674 else: 5675 func = func_builder(args) 5676 5677 func = self.validate_expression(func, args) 5678 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5679 func.meta["name"] = this 5680 5681 this = func 5682 else: 5683 if token_type == TokenType.IDENTIFIER: 5684 this = exp.Identifier(this=this, quoted=True) 5685 this = self.expression(exp.Anonymous, this=this, expressions=args) 5686 5687 if isinstance(this, exp.Expression): 5688 this.add_comments(comments) 5689 5690 self._match_r_paren(this) 5691 return self._parse_window(this) 5692 5693 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5694 return expression 5695 5696 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5697 transformed = [] 5698 5699 for index, e in enumerate(expressions): 5700 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5701 if isinstance(e, exp.Alias): 5702 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5703 5704 if not isinstance(e, exp.PropertyEQ): 5705 e = self.expression( 5706 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5707 ) 5708 5709 if isinstance(e.this, exp.Column): 5710 e.this.replace(e.this.this) 5711 else: 5712 e = self._to_prop_eq(e, index) 5713 5714 transformed.append(e) 5715 5716 return transformed 5717 5718 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5719 return self._parse_statement() 5720 5721 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5722 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5723 5724 def _parse_user_defined_function( 5725 self, kind: t.Optional[TokenType] = None 5726 ) -> t.Optional[exp.Expression]: 5727 this = self._parse_table_parts(schema=True) 5728 5729 if not self._match(TokenType.L_PAREN): 5730 return this 5731 5732 expressions = self._parse_csv(self._parse_function_parameter) 5733 self._match_r_paren() 5734 return self.expression( 5735 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5736 ) 5737 5738 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5739 literal = self._parse_primary() 5740 if literal: 5741 return self.expression(exp.Introducer, this=token.text, expression=literal) 5742 5743 return self.expression(exp.Identifier, this=token.text) 5744 5745 def _parse_session_parameter(self) -> exp.SessionParameter: 5746 kind = None 5747 this = self._parse_id_var() or self._parse_primary() 5748 5749 if this and self._match(TokenType.DOT): 5750 kind = this.name 5751 this = self._parse_var() or self._parse_primary() 5752 5753 return self.expression(exp.SessionParameter, this=this, kind=kind) 5754 5755 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5756 return self._parse_id_var() 5757 5758 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5759 index = self._index 5760 5761 if self._match(TokenType.L_PAREN): 5762 expressions = t.cast( 5763 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5764 ) 5765 5766 if not self._match(TokenType.R_PAREN): 5767 self._retreat(index) 5768 else: 5769 expressions = [self._parse_lambda_arg()] 5770 5771 if self._match_set(self.LAMBDAS): 5772 return self.LAMBDAS[self._prev.token_type](self, expressions) 5773 5774 self._retreat(index) 5775 5776 this: t.Optional[exp.Expression] 5777 5778 if self._match(TokenType.DISTINCT): 5779 this = self.expression( 5780 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5781 ) 5782 else: 5783 this = self._parse_select_or_expression(alias=alias) 5784 5785 return self._parse_limit( 5786 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5787 ) 5788 5789 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5790 index = self._index 5791 if not self._match(TokenType.L_PAREN): 5792 return this 5793 5794 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5795 # expr can be of both types 5796 if self._match_set(self.SELECT_START_TOKENS): 5797 self._retreat(index) 5798 return this 5799 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5800 self._match_r_paren() 5801 return self.expression(exp.Schema, this=this, expressions=args) 5802 5803 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5804 return self._parse_column_def(self._parse_field(any_token=True)) 5805 5806 def _parse_column_def( 5807 self, this: t.Optional[exp.Expression], computed_column: bool = True 5808 ) -> t.Optional[exp.Expression]: 5809 # column defs are not really columns, they're identifiers 5810 if isinstance(this, exp.Column): 5811 this = this.this 5812 5813 if not computed_column: 5814 self._match(TokenType.ALIAS) 5815 5816 kind = self._parse_types(schema=True) 5817 5818 if self._match_text_seq("FOR", "ORDINALITY"): 5819 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5820 5821 constraints: t.List[exp.Expression] = [] 5822 5823 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5824 ("ALIAS", "MATERIALIZED") 5825 ): 5826 persisted = self._prev.text.upper() == "MATERIALIZED" 5827 constraint_kind = exp.ComputedColumnConstraint( 5828 this=self._parse_assignment(), 5829 persisted=persisted or self._match_text_seq("PERSISTED"), 5830 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5831 ) 5832 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5833 elif ( 5834 kind 5835 and self._match(TokenType.ALIAS, advance=False) 5836 and ( 5837 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5838 or (self._next and self._next.token_type == TokenType.L_PAREN) 5839 ) 5840 ): 5841 self._advance() 5842 constraints.append( 5843 self.expression( 5844 exp.ColumnConstraint, 5845 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5846 ) 5847 ) 5848 5849 while True: 5850 constraint = self._parse_column_constraint() 5851 if not constraint: 5852 break 5853 constraints.append(constraint) 5854 5855 if not kind and not constraints: 5856 return this 5857 5858 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5859 5860 def _parse_auto_increment( 5861 self, 5862 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5863 start = None 5864 increment = None 5865 5866 if self._match(TokenType.L_PAREN, advance=False): 5867 args = self._parse_wrapped_csv(self._parse_bitwise) 5868 start = seq_get(args, 0) 5869 increment = seq_get(args, 1) 5870 elif self._match_text_seq("START"): 5871 start = self._parse_bitwise() 5872 self._match_text_seq("INCREMENT") 5873 increment = self._parse_bitwise() 5874 5875 if start and increment: 5876 return exp.GeneratedAsIdentityColumnConstraint( 5877 start=start, increment=increment, this=False 5878 ) 5879 5880 return exp.AutoIncrementColumnConstraint() 5881 5882 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5883 if not self._match_text_seq("REFRESH"): 5884 self._retreat(self._index - 1) 5885 return None 5886 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5887 5888 def _parse_compress(self) -> exp.CompressColumnConstraint: 5889 if self._match(TokenType.L_PAREN, advance=False): 5890 return self.expression( 5891 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5892 ) 5893 5894 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5895 5896 def _parse_generated_as_identity( 5897 self, 5898 ) -> ( 5899 exp.GeneratedAsIdentityColumnConstraint 5900 | exp.ComputedColumnConstraint 5901 | exp.GeneratedAsRowColumnConstraint 5902 ): 5903 if self._match_text_seq("BY", "DEFAULT"): 5904 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5905 this = self.expression( 5906 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5907 ) 5908 else: 5909 self._match_text_seq("ALWAYS") 5910 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5911 5912 self._match(TokenType.ALIAS) 5913 5914 if self._match_text_seq("ROW"): 5915 start = self._match_text_seq("START") 5916 if not start: 5917 self._match(TokenType.END) 5918 hidden = self._match_text_seq("HIDDEN") 5919 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5920 5921 identity = self._match_text_seq("IDENTITY") 5922 5923 if self._match(TokenType.L_PAREN): 5924 if self._match(TokenType.START_WITH): 5925 this.set("start", self._parse_bitwise()) 5926 if self._match_text_seq("INCREMENT", "BY"): 5927 this.set("increment", self._parse_bitwise()) 5928 if self._match_text_seq("MINVALUE"): 5929 this.set("minvalue", self._parse_bitwise()) 5930 if self._match_text_seq("MAXVALUE"): 5931 this.set("maxvalue", self._parse_bitwise()) 5932 5933 if self._match_text_seq("CYCLE"): 5934 this.set("cycle", True) 5935 elif self._match_text_seq("NO", "CYCLE"): 5936 this.set("cycle", False) 5937 5938 if not identity: 5939 this.set("expression", self._parse_range()) 5940 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5941 args = self._parse_csv(self._parse_bitwise) 5942 this.set("start", seq_get(args, 0)) 5943 this.set("increment", seq_get(args, 1)) 5944 5945 self._match_r_paren() 5946 5947 return this 5948 5949 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5950 self._match_text_seq("LENGTH") 5951 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5952 5953 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5954 if self._match_text_seq("NULL"): 5955 return self.expression(exp.NotNullColumnConstraint) 5956 if self._match_text_seq("CASESPECIFIC"): 5957 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5958 if self._match_text_seq("FOR", "REPLICATION"): 5959 return self.expression(exp.NotForReplicationColumnConstraint) 5960 5961 # Unconsume the `NOT` token 5962 self._retreat(self._index - 1) 5963 return None 5964 5965 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5966 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5967 5968 procedure_option_follows = ( 5969 self._match(TokenType.WITH, advance=False) 5970 and self._next 5971 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5972 ) 5973 5974 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5975 return self.expression( 5976 exp.ColumnConstraint, 5977 this=this, 5978 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5979 ) 5980 5981 return this 5982 5983 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5984 if not self._match(TokenType.CONSTRAINT): 5985 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5986 5987 return self.expression( 5988 exp.Constraint, 5989 this=self._parse_id_var(), 5990 expressions=self._parse_unnamed_constraints(), 5991 ) 5992 5993 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5994 constraints = [] 5995 while True: 5996 constraint = self._parse_unnamed_constraint() or self._parse_function() 5997 if not constraint: 5998 break 5999 constraints.append(constraint) 6000 6001 return constraints 6002 6003 def _parse_unnamed_constraint( 6004 self, constraints: t.Optional[t.Collection[str]] = None 6005 ) -> t.Optional[exp.Expression]: 6006 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6007 constraints or self.CONSTRAINT_PARSERS 6008 ): 6009 return None 6010 6011 constraint = self._prev.text.upper() 6012 if constraint not in self.CONSTRAINT_PARSERS: 6013 self.raise_error(f"No parser found for schema constraint {constraint}.") 6014 6015 return self.CONSTRAINT_PARSERS[constraint](self) 6016 6017 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6018 return self._parse_id_var(any_token=False) 6019 6020 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6021 self._match_text_seq("KEY") 6022 return self.expression( 6023 exp.UniqueColumnConstraint, 6024 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6025 this=self._parse_schema(self._parse_unique_key()), 6026 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6027 on_conflict=self._parse_on_conflict(), 6028 options=self._parse_key_constraint_options(), 6029 ) 6030 6031 def _parse_key_constraint_options(self) -> t.List[str]: 6032 options = [] 6033 while True: 6034 if not self._curr: 6035 break 6036 6037 if self._match(TokenType.ON): 6038 action = None 6039 on = self._advance_any() and self._prev.text 6040 6041 if self._match_text_seq("NO", "ACTION"): 6042 action = "NO ACTION" 6043 elif self._match_text_seq("CASCADE"): 6044 action = "CASCADE" 6045 elif self._match_text_seq("RESTRICT"): 6046 action = "RESTRICT" 6047 elif self._match_pair(TokenType.SET, TokenType.NULL): 6048 action = "SET NULL" 6049 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6050 action = "SET DEFAULT" 6051 else: 6052 self.raise_error("Invalid key constraint") 6053 6054 options.append(f"ON {on} {action}") 6055 else: 6056 var = self._parse_var_from_options( 6057 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6058 ) 6059 if not var: 6060 break 6061 options.append(var.name) 6062 6063 return options 6064 6065 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6066 if match and not self._match(TokenType.REFERENCES): 6067 return None 6068 6069 expressions = None 6070 this = self._parse_table(schema=True) 6071 options = self._parse_key_constraint_options() 6072 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6073 6074 def _parse_foreign_key(self) -> exp.ForeignKey: 6075 expressions = self._parse_wrapped_id_vars() 6076 reference = self._parse_references() 6077 on_options = {} 6078 6079 while self._match(TokenType.ON): 6080 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6081 self.raise_error("Expected DELETE or UPDATE") 6082 6083 kind = self._prev.text.lower() 6084 6085 if self._match_text_seq("NO", "ACTION"): 6086 action = "NO ACTION" 6087 elif self._match(TokenType.SET): 6088 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6089 action = "SET " + self._prev.text.upper() 6090 else: 6091 self._advance() 6092 action = self._prev.text.upper() 6093 6094 on_options[kind] = action 6095 6096 return self.expression( 6097 exp.ForeignKey, 6098 expressions=expressions, 6099 reference=reference, 6100 options=self._parse_key_constraint_options(), 6101 **on_options, # type: ignore 6102 ) 6103 6104 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6105 return self._parse_ordered() or self._parse_field() 6106 6107 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6108 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6109 self._retreat(self._index - 1) 6110 return None 6111 6112 id_vars = self._parse_wrapped_id_vars() 6113 return self.expression( 6114 exp.PeriodForSystemTimeConstraint, 6115 this=seq_get(id_vars, 0), 6116 expression=seq_get(id_vars, 1), 6117 ) 6118 6119 def _parse_primary_key( 6120 self, wrapped_optional: bool = False, in_props: bool = False 6121 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6122 desc = ( 6123 self._match_set((TokenType.ASC, TokenType.DESC)) 6124 and self._prev.token_type == TokenType.DESC 6125 ) 6126 6127 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6128 return self.expression( 6129 exp.PrimaryKeyColumnConstraint, 6130 desc=desc, 6131 options=self._parse_key_constraint_options(), 6132 ) 6133 6134 expressions = self._parse_wrapped_csv( 6135 self._parse_primary_key_part, optional=wrapped_optional 6136 ) 6137 options = self._parse_key_constraint_options() 6138 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6139 6140 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6141 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6142 6143 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6144 """ 6145 Parses a datetime column in ODBC format. We parse the column into the corresponding 6146 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6147 same as we did for `DATE('yyyy-mm-dd')`. 6148 6149 Reference: 6150 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6151 """ 6152 self._match(TokenType.VAR) 6153 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6154 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6155 if not self._match(TokenType.R_BRACE): 6156 self.raise_error("Expected }") 6157 return expression 6158 6159 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6160 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6161 return this 6162 6163 bracket_kind = self._prev.token_type 6164 if ( 6165 bracket_kind == TokenType.L_BRACE 6166 and self._curr 6167 and self._curr.token_type == TokenType.VAR 6168 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6169 ): 6170 return self._parse_odbc_datetime_literal() 6171 6172 expressions = self._parse_csv( 6173 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6174 ) 6175 6176 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6177 self.raise_error("Expected ]") 6178 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6179 self.raise_error("Expected }") 6180 6181 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6182 if bracket_kind == TokenType.L_BRACE: 6183 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6184 elif not this: 6185 this = build_array_constructor( 6186 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6187 ) 6188 else: 6189 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6190 if constructor_type: 6191 return build_array_constructor( 6192 constructor_type, 6193 args=expressions, 6194 bracket_kind=bracket_kind, 6195 dialect=self.dialect, 6196 ) 6197 6198 expressions = apply_index_offset( 6199 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6200 ) 6201 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6202 6203 self._add_comments(this) 6204 return self._parse_bracket(this) 6205 6206 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6207 if self._match(TokenType.COLON): 6208 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6209 return this 6210 6211 def _parse_case(self) -> t.Optional[exp.Expression]: 6212 ifs = [] 6213 default = None 6214 6215 comments = self._prev_comments 6216 expression = self._parse_assignment() 6217 6218 while self._match(TokenType.WHEN): 6219 this = self._parse_assignment() 6220 self._match(TokenType.THEN) 6221 then = self._parse_assignment() 6222 ifs.append(self.expression(exp.If, this=this, true=then)) 6223 6224 if self._match(TokenType.ELSE): 6225 default = self._parse_assignment() 6226 6227 if not self._match(TokenType.END): 6228 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6229 default = exp.column("interval") 6230 else: 6231 self.raise_error("Expected END after CASE", self._prev) 6232 6233 return self.expression( 6234 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6235 ) 6236 6237 def _parse_if(self) -> t.Optional[exp.Expression]: 6238 if self._match(TokenType.L_PAREN): 6239 args = self._parse_csv( 6240 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6241 ) 6242 this = self.validate_expression(exp.If.from_arg_list(args), args) 6243 self._match_r_paren() 6244 else: 6245 index = self._index - 1 6246 6247 if self.NO_PAREN_IF_COMMANDS and index == 0: 6248 return self._parse_as_command(self._prev) 6249 6250 condition = self._parse_assignment() 6251 6252 if not condition: 6253 self._retreat(index) 6254 return None 6255 6256 self._match(TokenType.THEN) 6257 true = self._parse_assignment() 6258 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6259 self._match(TokenType.END) 6260 this = self.expression(exp.If, this=condition, true=true, false=false) 6261 6262 return this 6263 6264 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6265 if not self._match_text_seq("VALUE", "FOR"): 6266 self._retreat(self._index - 1) 6267 return None 6268 6269 return self.expression( 6270 exp.NextValueFor, 6271 this=self._parse_column(), 6272 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6273 ) 6274 6275 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6276 this = self._parse_function() or self._parse_var_or_string(upper=True) 6277 6278 if self._match(TokenType.FROM): 6279 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6280 6281 if not self._match(TokenType.COMMA): 6282 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6283 6284 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6285 6286 def _parse_gap_fill(self) -> exp.GapFill: 6287 self._match(TokenType.TABLE) 6288 this = self._parse_table() 6289 6290 self._match(TokenType.COMMA) 6291 args = [this, *self._parse_csv(self._parse_lambda)] 6292 6293 gap_fill = exp.GapFill.from_arg_list(args) 6294 return self.validate_expression(gap_fill, args) 6295 6296 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6297 this = self._parse_assignment() 6298 6299 if not self._match(TokenType.ALIAS): 6300 if self._match(TokenType.COMMA): 6301 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6302 6303 self.raise_error("Expected AS after CAST") 6304 6305 fmt = None 6306 to = self._parse_types() 6307 6308 default = self._match(TokenType.DEFAULT) 6309 if default: 6310 default = self._parse_bitwise() 6311 self._match_text_seq("ON", "CONVERSION", "ERROR") 6312 6313 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6314 fmt_string = self._parse_string() 6315 fmt = self._parse_at_time_zone(fmt_string) 6316 6317 if not to: 6318 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6319 if to.this in exp.DataType.TEMPORAL_TYPES: 6320 this = self.expression( 6321 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6322 this=this, 6323 format=exp.Literal.string( 6324 format_time( 6325 fmt_string.this if fmt_string else "", 6326 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6327 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6328 ) 6329 ), 6330 safe=safe, 6331 ) 6332 6333 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6334 this.set("zone", fmt.args["zone"]) 6335 return this 6336 elif not to: 6337 self.raise_error("Expected TYPE after CAST") 6338 elif isinstance(to, exp.Identifier): 6339 to = exp.DataType.build(to.name, udt=True) 6340 elif to.this == exp.DataType.Type.CHAR: 6341 if self._match(TokenType.CHARACTER_SET): 6342 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6343 6344 return self.expression( 6345 exp.Cast if strict else exp.TryCast, 6346 this=this, 6347 to=to, 6348 format=fmt, 6349 safe=safe, 6350 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6351 default=default, 6352 ) 6353 6354 def _parse_string_agg(self) -> exp.GroupConcat: 6355 if self._match(TokenType.DISTINCT): 6356 args: t.List[t.Optional[exp.Expression]] = [ 6357 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6358 ] 6359 if self._match(TokenType.COMMA): 6360 args.extend(self._parse_csv(self._parse_assignment)) 6361 else: 6362 args = self._parse_csv(self._parse_assignment) # type: ignore 6363 6364 if self._match_text_seq("ON", "OVERFLOW"): 6365 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6366 if self._match_text_seq("ERROR"): 6367 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6368 else: 6369 self._match_text_seq("TRUNCATE") 6370 on_overflow = self.expression( 6371 exp.OverflowTruncateBehavior, 6372 this=self._parse_string(), 6373 with_count=( 6374 self._match_text_seq("WITH", "COUNT") 6375 or not self._match_text_seq("WITHOUT", "COUNT") 6376 ), 6377 ) 6378 else: 6379 on_overflow = None 6380 6381 index = self._index 6382 if not self._match(TokenType.R_PAREN) and args: 6383 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6384 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6385 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6386 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6387 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6388 6389 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6390 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6391 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6392 if not self._match_text_seq("WITHIN", "GROUP"): 6393 self._retreat(index) 6394 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6395 6396 # The corresponding match_r_paren will be called in parse_function (caller) 6397 self._match_l_paren() 6398 6399 return self.expression( 6400 exp.GroupConcat, 6401 this=self._parse_order(this=seq_get(args, 0)), 6402 separator=seq_get(args, 1), 6403 on_overflow=on_overflow, 6404 ) 6405 6406 def _parse_convert( 6407 self, strict: bool, safe: t.Optional[bool] = None 6408 ) -> t.Optional[exp.Expression]: 6409 this = self._parse_bitwise() 6410 6411 if self._match(TokenType.USING): 6412 to: t.Optional[exp.Expression] = self.expression( 6413 exp.CharacterSet, this=self._parse_var() 6414 ) 6415 elif self._match(TokenType.COMMA): 6416 to = self._parse_types() 6417 else: 6418 to = None 6419 6420 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6421 6422 def _parse_xml_table(self) -> exp.XMLTable: 6423 namespaces = None 6424 passing = None 6425 columns = None 6426 6427 if self._match_text_seq("XMLNAMESPACES", "("): 6428 namespaces = self._parse_xml_namespace() 6429 self._match_text_seq(")", ",") 6430 6431 this = self._parse_string() 6432 6433 if self._match_text_seq("PASSING"): 6434 # The BY VALUE keywords are optional and are provided for semantic clarity 6435 self._match_text_seq("BY", "VALUE") 6436 passing = self._parse_csv(self._parse_column) 6437 6438 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6439 6440 if self._match_text_seq("COLUMNS"): 6441 columns = self._parse_csv(self._parse_field_def) 6442 6443 return self.expression( 6444 exp.XMLTable, 6445 this=this, 6446 namespaces=namespaces, 6447 passing=passing, 6448 columns=columns, 6449 by_ref=by_ref, 6450 ) 6451 6452 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6453 namespaces = [] 6454 6455 while True: 6456 if self._match(TokenType.DEFAULT): 6457 uri = self._parse_string() 6458 else: 6459 uri = self._parse_alias(self._parse_string()) 6460 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6461 if not self._match(TokenType.COMMA): 6462 break 6463 6464 return namespaces 6465 6466 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6467 """ 6468 There are generally two variants of the DECODE function: 6469 6470 - DECODE(bin, charset) 6471 - DECODE(expression, search, result [, search, result] ... [, default]) 6472 6473 The second variant will always be parsed into a CASE expression. Note that NULL 6474 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6475 instead of relying on pattern matching. 6476 """ 6477 args = self._parse_csv(self._parse_assignment) 6478 6479 if len(args) < 3: 6480 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6481 6482 expression, *expressions = args 6483 if not expression: 6484 return None 6485 6486 ifs = [] 6487 for search, result in zip(expressions[::2], expressions[1::2]): 6488 if not search or not result: 6489 return None 6490 6491 if isinstance(search, exp.Literal): 6492 ifs.append( 6493 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6494 ) 6495 elif isinstance(search, exp.Null): 6496 ifs.append( 6497 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6498 ) 6499 else: 6500 cond = exp.or_( 6501 exp.EQ(this=expression.copy(), expression=search), 6502 exp.and_( 6503 exp.Is(this=expression.copy(), expression=exp.Null()), 6504 exp.Is(this=search.copy(), expression=exp.Null()), 6505 copy=False, 6506 ), 6507 copy=False, 6508 ) 6509 ifs.append(exp.If(this=cond, true=result)) 6510 6511 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6512 6513 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6514 self._match_text_seq("KEY") 6515 key = self._parse_column() 6516 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6517 self._match_text_seq("VALUE") 6518 value = self._parse_bitwise() 6519 6520 if not key and not value: 6521 return None 6522 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6523 6524 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6525 if not this or not self._match_text_seq("FORMAT", "JSON"): 6526 return this 6527 6528 return self.expression(exp.FormatJson, this=this) 6529 6530 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6531 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6532 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6533 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6534 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6535 else: 6536 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6537 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6538 6539 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6540 6541 if not empty and not error and not null: 6542 return None 6543 6544 return self.expression( 6545 exp.OnCondition, 6546 empty=empty, 6547 error=error, 6548 null=null, 6549 ) 6550 6551 def _parse_on_handling( 6552 self, on: str, *values: str 6553 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6554 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6555 for value in values: 6556 if self._match_text_seq(value, "ON", on): 6557 return f"{value} ON {on}" 6558 6559 index = self._index 6560 if self._match(TokenType.DEFAULT): 6561 default_value = self._parse_bitwise() 6562 if self._match_text_seq("ON", on): 6563 return default_value 6564 6565 self._retreat(index) 6566 6567 return None 6568 6569 @t.overload 6570 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6571 6572 @t.overload 6573 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6574 6575 def _parse_json_object(self, agg=False): 6576 star = self._parse_star() 6577 expressions = ( 6578 [star] 6579 if star 6580 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6581 ) 6582 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6583 6584 unique_keys = None 6585 if self._match_text_seq("WITH", "UNIQUE"): 6586 unique_keys = True 6587 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6588 unique_keys = False 6589 6590 self._match_text_seq("KEYS") 6591 6592 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6593 self._parse_type() 6594 ) 6595 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6596 6597 return self.expression( 6598 exp.JSONObjectAgg if agg else exp.JSONObject, 6599 expressions=expressions, 6600 null_handling=null_handling, 6601 unique_keys=unique_keys, 6602 return_type=return_type, 6603 encoding=encoding, 6604 ) 6605 6606 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6607 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6608 if not self._match_text_seq("NESTED"): 6609 this = self._parse_id_var() 6610 kind = self._parse_types(allow_identifiers=False) 6611 nested = None 6612 else: 6613 this = None 6614 kind = None 6615 nested = True 6616 6617 path = self._match_text_seq("PATH") and self._parse_string() 6618 nested_schema = nested and self._parse_json_schema() 6619 6620 return self.expression( 6621 exp.JSONColumnDef, 6622 this=this, 6623 kind=kind, 6624 path=path, 6625 nested_schema=nested_schema, 6626 ) 6627 6628 def _parse_json_schema(self) -> exp.JSONSchema: 6629 self._match_text_seq("COLUMNS") 6630 return self.expression( 6631 exp.JSONSchema, 6632 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6633 ) 6634 6635 def _parse_json_table(self) -> exp.JSONTable: 6636 this = self._parse_format_json(self._parse_bitwise()) 6637 path = self._match(TokenType.COMMA) and self._parse_string() 6638 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6639 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6640 schema = self._parse_json_schema() 6641 6642 return exp.JSONTable( 6643 this=this, 6644 schema=schema, 6645 path=path, 6646 error_handling=error_handling, 6647 empty_handling=empty_handling, 6648 ) 6649 6650 def _parse_match_against(self) -> exp.MatchAgainst: 6651 expressions = self._parse_csv(self._parse_column) 6652 6653 self._match_text_seq(")", "AGAINST", "(") 6654 6655 this = self._parse_string() 6656 6657 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6658 modifier = "IN NATURAL LANGUAGE MODE" 6659 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6660 modifier = f"{modifier} WITH QUERY EXPANSION" 6661 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6662 modifier = "IN BOOLEAN MODE" 6663 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6664 modifier = "WITH QUERY EXPANSION" 6665 else: 6666 modifier = None 6667 6668 return self.expression( 6669 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6670 ) 6671 6672 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6673 def _parse_open_json(self) -> exp.OpenJSON: 6674 this = self._parse_bitwise() 6675 path = self._match(TokenType.COMMA) and self._parse_string() 6676 6677 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6678 this = self._parse_field(any_token=True) 6679 kind = self._parse_types() 6680 path = self._parse_string() 6681 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6682 6683 return self.expression( 6684 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6685 ) 6686 6687 expressions = None 6688 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6689 self._match_l_paren() 6690 expressions = self._parse_csv(_parse_open_json_column_def) 6691 6692 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6693 6694 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6695 args = self._parse_csv(self._parse_bitwise) 6696 6697 if self._match(TokenType.IN): 6698 return self.expression( 6699 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6700 ) 6701 6702 if haystack_first: 6703 haystack = seq_get(args, 0) 6704 needle = seq_get(args, 1) 6705 else: 6706 haystack = seq_get(args, 1) 6707 needle = seq_get(args, 0) 6708 6709 return self.expression( 6710 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6711 ) 6712 6713 def _parse_predict(self) -> exp.Predict: 6714 self._match_text_seq("MODEL") 6715 this = self._parse_table() 6716 6717 self._match(TokenType.COMMA) 6718 self._match_text_seq("TABLE") 6719 6720 return self.expression( 6721 exp.Predict, 6722 this=this, 6723 expression=self._parse_table(), 6724 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6725 ) 6726 6727 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6728 args = self._parse_csv(self._parse_table) 6729 return exp.JoinHint(this=func_name.upper(), expressions=args) 6730 6731 def _parse_substring(self) -> exp.Substring: 6732 # Postgres supports the form: substring(string [from int] [for int]) 6733 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6734 6735 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6736 6737 if self._match(TokenType.FROM): 6738 args.append(self._parse_bitwise()) 6739 if self._match(TokenType.FOR): 6740 if len(args) == 1: 6741 args.append(exp.Literal.number(1)) 6742 args.append(self._parse_bitwise()) 6743 6744 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6745 6746 def _parse_trim(self) -> exp.Trim: 6747 # https://www.w3resource.com/sql/character-functions/trim.php 6748 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6749 6750 position = None 6751 collation = None 6752 expression = None 6753 6754 if self._match_texts(self.TRIM_TYPES): 6755 position = self._prev.text.upper() 6756 6757 this = self._parse_bitwise() 6758 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6759 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6760 expression = self._parse_bitwise() 6761 6762 if invert_order: 6763 this, expression = expression, this 6764 6765 if self._match(TokenType.COLLATE): 6766 collation = self._parse_bitwise() 6767 6768 return self.expression( 6769 exp.Trim, this=this, position=position, expression=expression, collation=collation 6770 ) 6771 6772 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6773 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6774 6775 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6776 return self._parse_window(self._parse_id_var(), alias=True) 6777 6778 def _parse_respect_or_ignore_nulls( 6779 self, this: t.Optional[exp.Expression] 6780 ) -> t.Optional[exp.Expression]: 6781 if self._match_text_seq("IGNORE", "NULLS"): 6782 return self.expression(exp.IgnoreNulls, this=this) 6783 if self._match_text_seq("RESPECT", "NULLS"): 6784 return self.expression(exp.RespectNulls, this=this) 6785 return this 6786 6787 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6788 if self._match(TokenType.HAVING): 6789 self._match_texts(("MAX", "MIN")) 6790 max = self._prev.text.upper() != "MIN" 6791 return self.expression( 6792 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6793 ) 6794 6795 return this 6796 6797 def _parse_window( 6798 self, this: t.Optional[exp.Expression], alias: bool = False 6799 ) -> t.Optional[exp.Expression]: 6800 func = this 6801 comments = func.comments if isinstance(func, exp.Expression) else None 6802 6803 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6804 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6805 if self._match_text_seq("WITHIN", "GROUP"): 6806 order = self._parse_wrapped(self._parse_order) 6807 this = self.expression(exp.WithinGroup, this=this, expression=order) 6808 6809 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6810 self._match(TokenType.WHERE) 6811 this = self.expression( 6812 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6813 ) 6814 self._match_r_paren() 6815 6816 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6817 # Some dialects choose to implement and some do not. 6818 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6819 6820 # There is some code above in _parse_lambda that handles 6821 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6822 6823 # The below changes handle 6824 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6825 6826 # Oracle allows both formats 6827 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6828 # and Snowflake chose to do the same for familiarity 6829 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6830 if isinstance(this, exp.AggFunc): 6831 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6832 6833 if ignore_respect and ignore_respect is not this: 6834 ignore_respect.replace(ignore_respect.this) 6835 this = self.expression(ignore_respect.__class__, this=this) 6836 6837 this = self._parse_respect_or_ignore_nulls(this) 6838 6839 # bigquery select from window x AS (partition by ...) 6840 if alias: 6841 over = None 6842 self._match(TokenType.ALIAS) 6843 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6844 return this 6845 else: 6846 over = self._prev.text.upper() 6847 6848 if comments and isinstance(func, exp.Expression): 6849 func.pop_comments() 6850 6851 if not self._match(TokenType.L_PAREN): 6852 return self.expression( 6853 exp.Window, 6854 comments=comments, 6855 this=this, 6856 alias=self._parse_id_var(False), 6857 over=over, 6858 ) 6859 6860 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6861 6862 first = self._match(TokenType.FIRST) 6863 if self._match_text_seq("LAST"): 6864 first = False 6865 6866 partition, order = self._parse_partition_and_order() 6867 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6868 6869 if kind: 6870 self._match(TokenType.BETWEEN) 6871 start = self._parse_window_spec() 6872 self._match(TokenType.AND) 6873 end = self._parse_window_spec() 6874 6875 spec = self.expression( 6876 exp.WindowSpec, 6877 kind=kind, 6878 start=start["value"], 6879 start_side=start["side"], 6880 end=end["value"], 6881 end_side=end["side"], 6882 ) 6883 else: 6884 spec = None 6885 6886 self._match_r_paren() 6887 6888 window = self.expression( 6889 exp.Window, 6890 comments=comments, 6891 this=this, 6892 partition_by=partition, 6893 order=order, 6894 spec=spec, 6895 alias=window_alias, 6896 over=over, 6897 first=first, 6898 ) 6899 6900 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6901 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6902 return self._parse_window(window, alias=alias) 6903 6904 return window 6905 6906 def _parse_partition_and_order( 6907 self, 6908 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6909 return self._parse_partition_by(), self._parse_order() 6910 6911 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6912 self._match(TokenType.BETWEEN) 6913 6914 return { 6915 "value": ( 6916 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6917 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6918 or self._parse_bitwise() 6919 ), 6920 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6921 } 6922 6923 def _parse_alias( 6924 self, this: t.Optional[exp.Expression], explicit: bool = False 6925 ) -> t.Optional[exp.Expression]: 6926 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6927 # so this section tries to parse the clause version and if it fails, it treats the token 6928 # as an identifier (alias) 6929 if self._can_parse_limit_or_offset(): 6930 return this 6931 6932 any_token = self._match(TokenType.ALIAS) 6933 comments = self._prev_comments or [] 6934 6935 if explicit and not any_token: 6936 return this 6937 6938 if self._match(TokenType.L_PAREN): 6939 aliases = self.expression( 6940 exp.Aliases, 6941 comments=comments, 6942 this=this, 6943 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6944 ) 6945 self._match_r_paren(aliases) 6946 return aliases 6947 6948 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6949 self.STRING_ALIASES and self._parse_string_as_identifier() 6950 ) 6951 6952 if alias: 6953 comments.extend(alias.pop_comments()) 6954 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6955 column = this.this 6956 6957 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6958 if not this.comments and column and column.comments: 6959 this.comments = column.pop_comments() 6960 6961 return this 6962 6963 def _parse_id_var( 6964 self, 6965 any_token: bool = True, 6966 tokens: t.Optional[t.Collection[TokenType]] = None, 6967 ) -> t.Optional[exp.Expression]: 6968 expression = self._parse_identifier() 6969 if not expression and ( 6970 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6971 ): 6972 quoted = self._prev.token_type == TokenType.STRING 6973 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6974 6975 return expression 6976 6977 def _parse_string(self) -> t.Optional[exp.Expression]: 6978 if self._match_set(self.STRING_PARSERS): 6979 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6980 return self._parse_placeholder() 6981 6982 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6983 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6984 6985 def _parse_number(self) -> t.Optional[exp.Expression]: 6986 if self._match_set(self.NUMERIC_PARSERS): 6987 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6988 return self._parse_placeholder() 6989 6990 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6991 if self._match(TokenType.IDENTIFIER): 6992 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6993 return self._parse_placeholder() 6994 6995 def _parse_var( 6996 self, 6997 any_token: bool = False, 6998 tokens: t.Optional[t.Collection[TokenType]] = None, 6999 upper: bool = False, 7000 ) -> t.Optional[exp.Expression]: 7001 if ( 7002 (any_token and self._advance_any()) 7003 or self._match(TokenType.VAR) 7004 or (self._match_set(tokens) if tokens else False) 7005 ): 7006 return self.expression( 7007 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7008 ) 7009 return self._parse_placeholder() 7010 7011 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7012 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7013 self._advance() 7014 return self._prev 7015 return None 7016 7017 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7018 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7019 7020 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7021 return self._parse_primary() or self._parse_var(any_token=True) 7022 7023 def _parse_null(self) -> t.Optional[exp.Expression]: 7024 if self._match_set(self.NULL_TOKENS): 7025 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7026 return self._parse_placeholder() 7027 7028 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7029 if self._match(TokenType.TRUE): 7030 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7031 if self._match(TokenType.FALSE): 7032 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7033 return self._parse_placeholder() 7034 7035 def _parse_star(self) -> t.Optional[exp.Expression]: 7036 if self._match(TokenType.STAR): 7037 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7038 return self._parse_placeholder() 7039 7040 def _parse_parameter(self) -> exp.Parameter: 7041 this = self._parse_identifier() or self._parse_primary_or_var() 7042 return self.expression(exp.Parameter, this=this) 7043 7044 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7045 if self._match_set(self.PLACEHOLDER_PARSERS): 7046 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7047 if placeholder: 7048 return placeholder 7049 self._advance(-1) 7050 return None 7051 7052 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7053 if not self._match_texts(keywords): 7054 return None 7055 if self._match(TokenType.L_PAREN, advance=False): 7056 return self._parse_wrapped_csv(self._parse_expression) 7057 7058 expression = self._parse_expression() 7059 return [expression] if expression else None 7060 7061 def _parse_csv( 7062 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7063 ) -> t.List[exp.Expression]: 7064 parse_result = parse_method() 7065 items = [parse_result] if parse_result is not None else [] 7066 7067 while self._match(sep): 7068 self._add_comments(parse_result) 7069 parse_result = parse_method() 7070 if parse_result is not None: 7071 items.append(parse_result) 7072 7073 return items 7074 7075 def _parse_tokens( 7076 self, parse_method: t.Callable, expressions: t.Dict 7077 ) -> t.Optional[exp.Expression]: 7078 this = parse_method() 7079 7080 while self._match_set(expressions): 7081 this = self.expression( 7082 expressions[self._prev.token_type], 7083 this=this, 7084 comments=self._prev_comments, 7085 expression=parse_method(), 7086 ) 7087 7088 return this 7089 7090 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7091 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7092 7093 def _parse_wrapped_csv( 7094 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7095 ) -> t.List[exp.Expression]: 7096 return self._parse_wrapped( 7097 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7098 ) 7099 7100 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7101 wrapped = self._match(TokenType.L_PAREN) 7102 if not wrapped and not optional: 7103 self.raise_error("Expecting (") 7104 parse_result = parse_method() 7105 if wrapped: 7106 self._match_r_paren() 7107 return parse_result 7108 7109 def _parse_expressions(self) -> t.List[exp.Expression]: 7110 return self._parse_csv(self._parse_expression) 7111 7112 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7113 return self._parse_select() or self._parse_set_operations( 7114 self._parse_alias(self._parse_assignment(), explicit=True) 7115 if alias 7116 else self._parse_assignment() 7117 ) 7118 7119 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7120 return self._parse_query_modifiers( 7121 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7122 ) 7123 7124 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7125 this = None 7126 if self._match_texts(self.TRANSACTION_KIND): 7127 this = self._prev.text 7128 7129 self._match_texts(("TRANSACTION", "WORK")) 7130 7131 modes = [] 7132 while True: 7133 mode = [] 7134 while self._match(TokenType.VAR): 7135 mode.append(self._prev.text) 7136 7137 if mode: 7138 modes.append(" ".join(mode)) 7139 if not self._match(TokenType.COMMA): 7140 break 7141 7142 return self.expression(exp.Transaction, this=this, modes=modes) 7143 7144 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7145 chain = None 7146 savepoint = None 7147 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7148 7149 self._match_texts(("TRANSACTION", "WORK")) 7150 7151 if self._match_text_seq("TO"): 7152 self._match_text_seq("SAVEPOINT") 7153 savepoint = self._parse_id_var() 7154 7155 if self._match(TokenType.AND): 7156 chain = not self._match_text_seq("NO") 7157 self._match_text_seq("CHAIN") 7158 7159 if is_rollback: 7160 return self.expression(exp.Rollback, savepoint=savepoint) 7161 7162 return self.expression(exp.Commit, chain=chain) 7163 7164 def _parse_refresh(self) -> exp.Refresh: 7165 self._match(TokenType.TABLE) 7166 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7167 7168 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7169 if not self._match_text_seq("ADD"): 7170 return None 7171 7172 self._match(TokenType.COLUMN) 7173 exists_column = self._parse_exists(not_=True) 7174 expression = self._parse_field_def() 7175 7176 if expression: 7177 expression.set("exists", exists_column) 7178 7179 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7180 if self._match_texts(("FIRST", "AFTER")): 7181 position = self._prev.text 7182 column_position = self.expression( 7183 exp.ColumnPosition, this=self._parse_column(), position=position 7184 ) 7185 expression.set("position", column_position) 7186 7187 return expression 7188 7189 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7190 drop = self._match(TokenType.DROP) and self._parse_drop() 7191 if drop and not isinstance(drop, exp.Command): 7192 drop.set("kind", drop.args.get("kind", "COLUMN")) 7193 return drop 7194 7195 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7196 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7197 return self.expression( 7198 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7199 ) 7200 7201 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7202 index = self._index - 1 7203 7204 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7205 return self._parse_csv( 7206 lambda: self.expression( 7207 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7208 ) 7209 ) 7210 7211 self._retreat(index) 7212 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7213 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7214 7215 if self._match_text_seq("ADD", "COLUMNS"): 7216 schema = self._parse_schema() 7217 if schema: 7218 return [schema] 7219 return [] 7220 7221 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7222 7223 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7224 if self._match_texts(self.ALTER_ALTER_PARSERS): 7225 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7226 7227 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7228 # keyword after ALTER we default to parsing this statement 7229 self._match(TokenType.COLUMN) 7230 column = self._parse_field(any_token=True) 7231 7232 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7233 return self.expression(exp.AlterColumn, this=column, drop=True) 7234 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7235 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7236 if self._match(TokenType.COMMENT): 7237 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7238 if self._match_text_seq("DROP", "NOT", "NULL"): 7239 return self.expression( 7240 exp.AlterColumn, 7241 this=column, 7242 drop=True, 7243 allow_null=True, 7244 ) 7245 if self._match_text_seq("SET", "NOT", "NULL"): 7246 return self.expression( 7247 exp.AlterColumn, 7248 this=column, 7249 allow_null=False, 7250 ) 7251 7252 if self._match_text_seq("SET", "VISIBLE"): 7253 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7254 if self._match_text_seq("SET", "INVISIBLE"): 7255 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7256 7257 self._match_text_seq("SET", "DATA") 7258 self._match_text_seq("TYPE") 7259 return self.expression( 7260 exp.AlterColumn, 7261 this=column, 7262 dtype=self._parse_types(), 7263 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7264 using=self._match(TokenType.USING) and self._parse_assignment(), 7265 ) 7266 7267 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7268 if self._match_texts(("ALL", "EVEN", "AUTO")): 7269 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7270 7271 self._match_text_seq("KEY", "DISTKEY") 7272 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7273 7274 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7275 if compound: 7276 self._match_text_seq("SORTKEY") 7277 7278 if self._match(TokenType.L_PAREN, advance=False): 7279 return self.expression( 7280 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7281 ) 7282 7283 self._match_texts(("AUTO", "NONE")) 7284 return self.expression( 7285 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7286 ) 7287 7288 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7289 index = self._index - 1 7290 7291 partition_exists = self._parse_exists() 7292 if self._match(TokenType.PARTITION, advance=False): 7293 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7294 7295 self._retreat(index) 7296 return self._parse_csv(self._parse_drop_column) 7297 7298 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7299 if self._match(TokenType.COLUMN): 7300 exists = self._parse_exists() 7301 old_column = self._parse_column() 7302 to = self._match_text_seq("TO") 7303 new_column = self._parse_column() 7304 7305 if old_column is None or to is None or new_column is None: 7306 return None 7307 7308 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7309 7310 self._match_text_seq("TO") 7311 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7312 7313 def _parse_alter_table_set(self) -> exp.AlterSet: 7314 alter_set = self.expression(exp.AlterSet) 7315 7316 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7317 "TABLE", "PROPERTIES" 7318 ): 7319 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7320 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7321 alter_set.set("expressions", [self._parse_assignment()]) 7322 elif self._match_texts(("LOGGED", "UNLOGGED")): 7323 alter_set.set("option", exp.var(self._prev.text.upper())) 7324 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7325 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7326 elif self._match_text_seq("LOCATION"): 7327 alter_set.set("location", self._parse_field()) 7328 elif self._match_text_seq("ACCESS", "METHOD"): 7329 alter_set.set("access_method", self._parse_field()) 7330 elif self._match_text_seq("TABLESPACE"): 7331 alter_set.set("tablespace", self._parse_field()) 7332 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7333 alter_set.set("file_format", [self._parse_field()]) 7334 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7335 alter_set.set("file_format", self._parse_wrapped_options()) 7336 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7337 alter_set.set("copy_options", self._parse_wrapped_options()) 7338 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7339 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7340 else: 7341 if self._match_text_seq("SERDE"): 7342 alter_set.set("serde", self._parse_field()) 7343 7344 alter_set.set("expressions", [self._parse_properties()]) 7345 7346 return alter_set 7347 7348 def _parse_alter(self) -> exp.Alter | exp.Command: 7349 start = self._prev 7350 7351 alter_token = self._match_set(self.ALTERABLES) and self._prev 7352 if not alter_token: 7353 return self._parse_as_command(start) 7354 7355 exists = self._parse_exists() 7356 only = self._match_text_seq("ONLY") 7357 this = self._parse_table(schema=True) 7358 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7359 7360 if self._next: 7361 self._advance() 7362 7363 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7364 if parser: 7365 actions = ensure_list(parser(self)) 7366 not_valid = self._match_text_seq("NOT", "VALID") 7367 options = self._parse_csv(self._parse_property) 7368 7369 if not self._curr and actions: 7370 return self.expression( 7371 exp.Alter, 7372 this=this, 7373 kind=alter_token.text.upper(), 7374 exists=exists, 7375 actions=actions, 7376 only=only, 7377 options=options, 7378 cluster=cluster, 7379 not_valid=not_valid, 7380 ) 7381 7382 return self._parse_as_command(start) 7383 7384 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7385 start = self._prev 7386 # https://duckdb.org/docs/sql/statements/analyze 7387 if not self._curr: 7388 return self.expression(exp.Analyze) 7389 7390 options = [] 7391 while self._match_texts(self.ANALYZE_STYLES): 7392 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7393 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7394 else: 7395 options.append(self._prev.text.upper()) 7396 7397 this: t.Optional[exp.Expression] = None 7398 inner_expression: t.Optional[exp.Expression] = None 7399 7400 kind = self._curr and self._curr.text.upper() 7401 7402 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7403 this = self._parse_table_parts() 7404 elif self._match_text_seq("TABLES"): 7405 if self._match_set((TokenType.FROM, TokenType.IN)): 7406 kind = f"{kind} {self._prev.text.upper()}" 7407 this = self._parse_table(schema=True, is_db_reference=True) 7408 elif self._match_text_seq("DATABASE"): 7409 this = self._parse_table(schema=True, is_db_reference=True) 7410 elif self._match_text_seq("CLUSTER"): 7411 this = self._parse_table() 7412 # Try matching inner expr keywords before fallback to parse table. 7413 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7414 kind = None 7415 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7416 else: 7417 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7418 kind = None 7419 this = self._parse_table_parts() 7420 7421 partition = self._try_parse(self._parse_partition) 7422 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7423 return self._parse_as_command(start) 7424 7425 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7426 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7427 "WITH", "ASYNC", "MODE" 7428 ): 7429 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7430 else: 7431 mode = None 7432 7433 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7434 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7435 7436 properties = self._parse_properties() 7437 return self.expression( 7438 exp.Analyze, 7439 kind=kind, 7440 this=this, 7441 mode=mode, 7442 partition=partition, 7443 properties=properties, 7444 expression=inner_expression, 7445 options=options, 7446 ) 7447 7448 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7449 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7450 this = None 7451 kind = self._prev.text.upper() 7452 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7453 expressions = [] 7454 7455 if not self._match_text_seq("STATISTICS"): 7456 self.raise_error("Expecting token STATISTICS") 7457 7458 if self._match_text_seq("NOSCAN"): 7459 this = "NOSCAN" 7460 elif self._match(TokenType.FOR): 7461 if self._match_text_seq("ALL", "COLUMNS"): 7462 this = "FOR ALL COLUMNS" 7463 if self._match_texts("COLUMNS"): 7464 this = "FOR COLUMNS" 7465 expressions = self._parse_csv(self._parse_column_reference) 7466 elif self._match_text_seq("SAMPLE"): 7467 sample = self._parse_number() 7468 expressions = [ 7469 self.expression( 7470 exp.AnalyzeSample, 7471 sample=sample, 7472 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7473 ) 7474 ] 7475 7476 return self.expression( 7477 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7478 ) 7479 7480 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7481 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7482 kind = None 7483 this = None 7484 expression: t.Optional[exp.Expression] = None 7485 if self._match_text_seq("REF", "UPDATE"): 7486 kind = "REF" 7487 this = "UPDATE" 7488 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7489 this = "UPDATE SET DANGLING TO NULL" 7490 elif self._match_text_seq("STRUCTURE"): 7491 kind = "STRUCTURE" 7492 if self._match_text_seq("CASCADE", "FAST"): 7493 this = "CASCADE FAST" 7494 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7495 ("ONLINE", "OFFLINE") 7496 ): 7497 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7498 expression = self._parse_into() 7499 7500 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7501 7502 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7503 this = self._prev.text.upper() 7504 if self._match_text_seq("COLUMNS"): 7505 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7506 return None 7507 7508 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7509 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7510 if self._match_text_seq("STATISTICS"): 7511 return self.expression(exp.AnalyzeDelete, kind=kind) 7512 return None 7513 7514 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7515 if self._match_text_seq("CHAINED", "ROWS"): 7516 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7517 return None 7518 7519 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7520 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7521 this = self._prev.text.upper() 7522 expression: t.Optional[exp.Expression] = None 7523 expressions = [] 7524 update_options = None 7525 7526 if self._match_text_seq("HISTOGRAM", "ON"): 7527 expressions = self._parse_csv(self._parse_column_reference) 7528 with_expressions = [] 7529 while self._match(TokenType.WITH): 7530 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7531 if self._match_texts(("SYNC", "ASYNC")): 7532 if self._match_text_seq("MODE", advance=False): 7533 with_expressions.append(f"{self._prev.text.upper()} MODE") 7534 self._advance() 7535 else: 7536 buckets = self._parse_number() 7537 if self._match_text_seq("BUCKETS"): 7538 with_expressions.append(f"{buckets} BUCKETS") 7539 if with_expressions: 7540 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7541 7542 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7543 TokenType.UPDATE, advance=False 7544 ): 7545 update_options = self._prev.text.upper() 7546 self._advance() 7547 elif self._match_text_seq("USING", "DATA"): 7548 expression = self.expression(exp.UsingData, this=self._parse_string()) 7549 7550 return self.expression( 7551 exp.AnalyzeHistogram, 7552 this=this, 7553 expressions=expressions, 7554 expression=expression, 7555 update_options=update_options, 7556 ) 7557 7558 def _parse_merge(self) -> exp.Merge: 7559 self._match(TokenType.INTO) 7560 target = self._parse_table() 7561 7562 if target and self._match(TokenType.ALIAS, advance=False): 7563 target.set("alias", self._parse_table_alias()) 7564 7565 self._match(TokenType.USING) 7566 using = self._parse_table() 7567 7568 self._match(TokenType.ON) 7569 on = self._parse_assignment() 7570 7571 return self.expression( 7572 exp.Merge, 7573 this=target, 7574 using=using, 7575 on=on, 7576 whens=self._parse_when_matched(), 7577 returning=self._parse_returning(), 7578 ) 7579 7580 def _parse_when_matched(self) -> exp.Whens: 7581 whens = [] 7582 7583 while self._match(TokenType.WHEN): 7584 matched = not self._match(TokenType.NOT) 7585 self._match_text_seq("MATCHED") 7586 source = ( 7587 False 7588 if self._match_text_seq("BY", "TARGET") 7589 else self._match_text_seq("BY", "SOURCE") 7590 ) 7591 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7592 7593 self._match(TokenType.THEN) 7594 7595 if self._match(TokenType.INSERT): 7596 this = self._parse_star() 7597 if this: 7598 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7599 else: 7600 then = self.expression( 7601 exp.Insert, 7602 this=exp.var("ROW") 7603 if self._match_text_seq("ROW") 7604 else self._parse_value(values=False), 7605 expression=self._match_text_seq("VALUES") and self._parse_value(), 7606 ) 7607 elif self._match(TokenType.UPDATE): 7608 expressions = self._parse_star() 7609 if expressions: 7610 then = self.expression(exp.Update, expressions=expressions) 7611 else: 7612 then = self.expression( 7613 exp.Update, 7614 expressions=self._match(TokenType.SET) 7615 and self._parse_csv(self._parse_equality), 7616 ) 7617 elif self._match(TokenType.DELETE): 7618 then = self.expression(exp.Var, this=self._prev.text) 7619 else: 7620 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7621 7622 whens.append( 7623 self.expression( 7624 exp.When, 7625 matched=matched, 7626 source=source, 7627 condition=condition, 7628 then=then, 7629 ) 7630 ) 7631 return self.expression(exp.Whens, expressions=whens) 7632 7633 def _parse_show(self) -> t.Optional[exp.Expression]: 7634 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7635 if parser: 7636 return parser(self) 7637 return self._parse_as_command(self._prev) 7638 7639 def _parse_set_item_assignment( 7640 self, kind: t.Optional[str] = None 7641 ) -> t.Optional[exp.Expression]: 7642 index = self._index 7643 7644 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7645 return self._parse_set_transaction(global_=kind == "GLOBAL") 7646 7647 left = self._parse_primary() or self._parse_column() 7648 assignment_delimiter = self._match_texts(("=", "TO")) 7649 7650 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7651 self._retreat(index) 7652 return None 7653 7654 right = self._parse_statement() or self._parse_id_var() 7655 if isinstance(right, (exp.Column, exp.Identifier)): 7656 right = exp.var(right.name) 7657 7658 this = self.expression(exp.EQ, this=left, expression=right) 7659 return self.expression(exp.SetItem, this=this, kind=kind) 7660 7661 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7662 self._match_text_seq("TRANSACTION") 7663 characteristics = self._parse_csv( 7664 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7665 ) 7666 return self.expression( 7667 exp.SetItem, 7668 expressions=characteristics, 7669 kind="TRANSACTION", 7670 **{"global": global_}, # type: ignore 7671 ) 7672 7673 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7674 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7675 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7676 7677 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7678 index = self._index 7679 set_ = self.expression( 7680 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7681 ) 7682 7683 if self._curr: 7684 self._retreat(index) 7685 return self._parse_as_command(self._prev) 7686 7687 return set_ 7688 7689 def _parse_var_from_options( 7690 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7691 ) -> t.Optional[exp.Var]: 7692 start = self._curr 7693 if not start: 7694 return None 7695 7696 option = start.text.upper() 7697 continuations = options.get(option) 7698 7699 index = self._index 7700 self._advance() 7701 for keywords in continuations or []: 7702 if isinstance(keywords, str): 7703 keywords = (keywords,) 7704 7705 if self._match_text_seq(*keywords): 7706 option = f"{option} {' '.join(keywords)}" 7707 break 7708 else: 7709 if continuations or continuations is None: 7710 if raise_unmatched: 7711 self.raise_error(f"Unknown option {option}") 7712 7713 self._retreat(index) 7714 return None 7715 7716 return exp.var(option) 7717 7718 def _parse_as_command(self, start: Token) -> exp.Command: 7719 while self._curr: 7720 self._advance() 7721 text = self._find_sql(start, self._prev) 7722 size = len(start.text) 7723 self._warn_unsupported() 7724 return exp.Command(this=text[:size], expression=text[size:]) 7725 7726 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7727 settings = [] 7728 7729 self._match_l_paren() 7730 kind = self._parse_id_var() 7731 7732 if self._match(TokenType.L_PAREN): 7733 while True: 7734 key = self._parse_id_var() 7735 value = self._parse_primary() 7736 if not key and value is None: 7737 break 7738 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7739 self._match(TokenType.R_PAREN) 7740 7741 self._match_r_paren() 7742 7743 return self.expression( 7744 exp.DictProperty, 7745 this=this, 7746 kind=kind.this if kind else None, 7747 settings=settings, 7748 ) 7749 7750 def _parse_dict_range(self, this: str) -> exp.DictRange: 7751 self._match_l_paren() 7752 has_min = self._match_text_seq("MIN") 7753 if has_min: 7754 min = self._parse_var() or self._parse_primary() 7755 self._match_text_seq("MAX") 7756 max = self._parse_var() or self._parse_primary() 7757 else: 7758 max = self._parse_var() or self._parse_primary() 7759 min = exp.Literal.number(0) 7760 self._match_r_paren() 7761 return self.expression(exp.DictRange, this=this, min=min, max=max) 7762 7763 def _parse_comprehension( 7764 self, this: t.Optional[exp.Expression] 7765 ) -> t.Optional[exp.Comprehension]: 7766 index = self._index 7767 expression = self._parse_column() 7768 if not self._match(TokenType.IN): 7769 self._retreat(index - 1) 7770 return None 7771 iterator = self._parse_column() 7772 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7773 return self.expression( 7774 exp.Comprehension, 7775 this=this, 7776 expression=expression, 7777 iterator=iterator, 7778 condition=condition, 7779 ) 7780 7781 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7782 if self._match(TokenType.HEREDOC_STRING): 7783 return self.expression(exp.Heredoc, this=self._prev.text) 7784 7785 if not self._match_text_seq("$"): 7786 return None 7787 7788 tags = ["$"] 7789 tag_text = None 7790 7791 if self._is_connected(): 7792 self._advance() 7793 tags.append(self._prev.text.upper()) 7794 else: 7795 self.raise_error("No closing $ found") 7796 7797 if tags[-1] != "$": 7798 if self._is_connected() and self._match_text_seq("$"): 7799 tag_text = tags[-1] 7800 tags.append("$") 7801 else: 7802 self.raise_error("No closing $ found") 7803 7804 heredoc_start = self._curr 7805 7806 while self._curr: 7807 if self._match_text_seq(*tags, advance=False): 7808 this = self._find_sql(heredoc_start, self._prev) 7809 self._advance(len(tags)) 7810 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7811 7812 self._advance() 7813 7814 self.raise_error(f"No closing {''.join(tags)} found") 7815 return None 7816 7817 def _find_parser( 7818 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7819 ) -> t.Optional[t.Callable]: 7820 if not self._curr: 7821 return None 7822 7823 index = self._index 7824 this = [] 7825 while True: 7826 # The current token might be multiple words 7827 curr = self._curr.text.upper() 7828 key = curr.split(" ") 7829 this.append(curr) 7830 7831 self._advance() 7832 result, trie = in_trie(trie, key) 7833 if result == TrieResult.FAILED: 7834 break 7835 7836 if result == TrieResult.EXISTS: 7837 subparser = parsers[" ".join(this)] 7838 return subparser 7839 7840 self._retreat(index) 7841 return None 7842 7843 def _match(self, token_type, advance=True, expression=None): 7844 if not self._curr: 7845 return None 7846 7847 if self._curr.token_type == token_type: 7848 if advance: 7849 self._advance() 7850 self._add_comments(expression) 7851 return True 7852 7853 return None 7854 7855 def _match_set(self, types, advance=True): 7856 if not self._curr: 7857 return None 7858 7859 if self._curr.token_type in types: 7860 if advance: 7861 self._advance() 7862 return True 7863 7864 return None 7865 7866 def _match_pair(self, token_type_a, token_type_b, advance=True): 7867 if not self._curr or not self._next: 7868 return None 7869 7870 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7871 if advance: 7872 self._advance(2) 7873 return True 7874 7875 return None 7876 7877 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7878 if not self._match(TokenType.L_PAREN, expression=expression): 7879 self.raise_error("Expecting (") 7880 7881 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7882 if not self._match(TokenType.R_PAREN, expression=expression): 7883 self.raise_error("Expecting )") 7884 7885 def _match_texts(self, texts, advance=True): 7886 if ( 7887 self._curr 7888 and self._curr.token_type != TokenType.STRING 7889 and self._curr.text.upper() in texts 7890 ): 7891 if advance: 7892 self._advance() 7893 return True 7894 return None 7895 7896 def _match_text_seq(self, *texts, advance=True): 7897 index = self._index 7898 for text in texts: 7899 if ( 7900 self._curr 7901 and self._curr.token_type != TokenType.STRING 7902 and self._curr.text.upper() == text 7903 ): 7904 self._advance() 7905 else: 7906 self._retreat(index) 7907 return None 7908 7909 if not advance: 7910 self._retreat(index) 7911 7912 return True 7913 7914 def _replace_lambda( 7915 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7916 ) -> t.Optional[exp.Expression]: 7917 if not node: 7918 return node 7919 7920 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7921 7922 for column in node.find_all(exp.Column): 7923 typ = lambda_types.get(column.parts[0].name) 7924 if typ is not None: 7925 dot_or_id = column.to_dot() if column.table else column.this 7926 7927 if typ: 7928 dot_or_id = self.expression( 7929 exp.Cast, 7930 this=dot_or_id, 7931 to=typ, 7932 ) 7933 7934 parent = column.parent 7935 7936 while isinstance(parent, exp.Dot): 7937 if not isinstance(parent.parent, exp.Dot): 7938 parent.replace(dot_or_id) 7939 break 7940 parent = parent.parent 7941 else: 7942 if column is node: 7943 node = dot_or_id 7944 else: 7945 column.replace(dot_or_id) 7946 return node 7947 7948 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7949 start = self._prev 7950 7951 # Not to be confused with TRUNCATE(number, decimals) function call 7952 if self._match(TokenType.L_PAREN): 7953 self._retreat(self._index - 2) 7954 return self._parse_function() 7955 7956 # Clickhouse supports TRUNCATE DATABASE as well 7957 is_database = self._match(TokenType.DATABASE) 7958 7959 self._match(TokenType.TABLE) 7960 7961 exists = self._parse_exists(not_=False) 7962 7963 expressions = self._parse_csv( 7964 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7965 ) 7966 7967 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7968 7969 if self._match_text_seq("RESTART", "IDENTITY"): 7970 identity = "RESTART" 7971 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7972 identity = "CONTINUE" 7973 else: 7974 identity = None 7975 7976 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7977 option = self._prev.text 7978 else: 7979 option = None 7980 7981 partition = self._parse_partition() 7982 7983 # Fallback case 7984 if self._curr: 7985 return self._parse_as_command(start) 7986 7987 return self.expression( 7988 exp.TruncateTable, 7989 expressions=expressions, 7990 is_database=is_database, 7991 exists=exists, 7992 cluster=cluster, 7993 identity=identity, 7994 option=option, 7995 partition=partition, 7996 ) 7997 7998 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7999 this = self._parse_ordered(self._parse_opclass) 8000 8001 if not self._match(TokenType.WITH): 8002 return this 8003 8004 op = self._parse_var(any_token=True) 8005 8006 return self.expression(exp.WithOperator, this=this, op=op) 8007 8008 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8009 self._match(TokenType.EQ) 8010 self._match(TokenType.L_PAREN) 8011 8012 opts: t.List[t.Optional[exp.Expression]] = [] 8013 option: exp.Expression | None 8014 while self._curr and not self._match(TokenType.R_PAREN): 8015 if self._match_text_seq("FORMAT_NAME", "="): 8016 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8017 option = self._parse_format_name() 8018 else: 8019 option = self._parse_property() 8020 8021 if option is None: 8022 self.raise_error("Unable to parse option") 8023 break 8024 8025 opts.append(option) 8026 8027 return opts 8028 8029 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8030 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8031 8032 options = [] 8033 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8034 option = self._parse_var(any_token=True) 8035 prev = self._prev.text.upper() 8036 8037 # Different dialects might separate options and values by white space, "=" and "AS" 8038 self._match(TokenType.EQ) 8039 self._match(TokenType.ALIAS) 8040 8041 param = self.expression(exp.CopyParameter, this=option) 8042 8043 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8044 TokenType.L_PAREN, advance=False 8045 ): 8046 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8047 param.set("expressions", self._parse_wrapped_options()) 8048 elif prev == "FILE_FORMAT": 8049 # T-SQL's external file format case 8050 param.set("expression", self._parse_field()) 8051 else: 8052 param.set("expression", self._parse_unquoted_field()) 8053 8054 options.append(param) 8055 self._match(sep) 8056 8057 return options 8058 8059 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8060 expr = self.expression(exp.Credentials) 8061 8062 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8063 expr.set("storage", self._parse_field()) 8064 if self._match_text_seq("CREDENTIALS"): 8065 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8066 creds = ( 8067 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8068 ) 8069 expr.set("credentials", creds) 8070 if self._match_text_seq("ENCRYPTION"): 8071 expr.set("encryption", self._parse_wrapped_options()) 8072 if self._match_text_seq("IAM_ROLE"): 8073 expr.set("iam_role", self._parse_field()) 8074 if self._match_text_seq("REGION"): 8075 expr.set("region", self._parse_field()) 8076 8077 return expr 8078 8079 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8080 return self._parse_field() 8081 8082 def _parse_copy(self) -> exp.Copy | exp.Command: 8083 start = self._prev 8084 8085 self._match(TokenType.INTO) 8086 8087 this = ( 8088 self._parse_select(nested=True, parse_subquery_alias=False) 8089 if self._match(TokenType.L_PAREN, advance=False) 8090 else self._parse_table(schema=True) 8091 ) 8092 8093 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8094 8095 files = self._parse_csv(self._parse_file_location) 8096 credentials = self._parse_credentials() 8097 8098 self._match_text_seq("WITH") 8099 8100 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8101 8102 # Fallback case 8103 if self._curr: 8104 return self._parse_as_command(start) 8105 8106 return self.expression( 8107 exp.Copy, 8108 this=this, 8109 kind=kind, 8110 credentials=credentials, 8111 files=files, 8112 params=params, 8113 ) 8114 8115 def _parse_normalize(self) -> exp.Normalize: 8116 return self.expression( 8117 exp.Normalize, 8118 this=self._parse_bitwise(), 8119 form=self._match(TokenType.COMMA) and self._parse_var(), 8120 ) 8121 8122 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8123 args = self._parse_csv(lambda: self._parse_lambda()) 8124 8125 this = seq_get(args, 0) 8126 decimals = seq_get(args, 1) 8127 8128 return expr_type( 8129 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8130 ) 8131 8132 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8133 if self._match_text_seq("COLUMNS", "(", advance=False): 8134 this = self._parse_function() 8135 if isinstance(this, exp.Columns): 8136 this.set("unpack", True) 8137 return this 8138 8139 return self.expression( 8140 exp.Star, 8141 **{ # type: ignore 8142 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8143 "replace": self._parse_star_op("REPLACE"), 8144 "rename": self._parse_star_op("RENAME"), 8145 }, 8146 ) 8147 8148 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8149 privilege_parts = [] 8150 8151 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8152 # (end of privilege list) or L_PAREN (start of column list) are met 8153 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8154 privilege_parts.append(self._curr.text.upper()) 8155 self._advance() 8156 8157 this = exp.var(" ".join(privilege_parts)) 8158 expressions = ( 8159 self._parse_wrapped_csv(self._parse_column) 8160 if self._match(TokenType.L_PAREN, advance=False) 8161 else None 8162 ) 8163 8164 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8165 8166 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8167 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8168 principal = self._parse_id_var() 8169 8170 if not principal: 8171 return None 8172 8173 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8174 8175 def _parse_grant(self) -> exp.Grant | exp.Command: 8176 start = self._prev 8177 8178 privileges = self._parse_csv(self._parse_grant_privilege) 8179 8180 self._match(TokenType.ON) 8181 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8182 8183 # Attempt to parse the securable e.g. MySQL allows names 8184 # such as "foo.*", "*.*" which are not easily parseable yet 8185 securable = self._try_parse(self._parse_table_parts) 8186 8187 if not securable or not self._match_text_seq("TO"): 8188 return self._parse_as_command(start) 8189 8190 principals = self._parse_csv(self._parse_grant_principal) 8191 8192 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8193 8194 if self._curr: 8195 return self._parse_as_command(start) 8196 8197 return self.expression( 8198 exp.Grant, 8199 privileges=privileges, 8200 kind=kind, 8201 securable=securable, 8202 principals=principals, 8203 grant_option=grant_option, 8204 ) 8205 8206 def _parse_overlay(self) -> exp.Overlay: 8207 return self.expression( 8208 exp.Overlay, 8209 **{ # type: ignore 8210 "this": self._parse_bitwise(), 8211 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8212 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8213 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8214 }, 8215 ) 8216 8217 def _parse_format_name(self) -> exp.Property: 8218 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8219 # for FILE_FORMAT = <format_name> 8220 return self.expression( 8221 exp.Property, 8222 this=exp.var("FORMAT_NAME"), 8223 value=self._parse_string() or self._parse_table_parts(), 8224 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1502 def __init__( 1503 self, 1504 error_level: t.Optional[ErrorLevel] = None, 1505 error_message_context: int = 100, 1506 max_errors: int = 3, 1507 dialect: DialectType = None, 1508 ): 1509 from sqlglot.dialects import Dialect 1510 1511 self.error_level = error_level or ErrorLevel.IMMEDIATE 1512 self.error_message_context = error_message_context 1513 self.max_errors = max_errors 1514 self.dialect = Dialect.get_or_raise(dialect) 1515 self.reset()
1527 def parse( 1528 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1529 ) -> t.List[t.Optional[exp.Expression]]: 1530 """ 1531 Parses a list of tokens and returns a list of syntax trees, one tree 1532 per parsed SQL statement. 1533 1534 Args: 1535 raw_tokens: The list of tokens. 1536 sql: The original SQL string, used to produce helpful debug messages. 1537 1538 Returns: 1539 The list of the produced syntax trees. 1540 """ 1541 return self._parse( 1542 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1543 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1545 def parse_into( 1546 self, 1547 expression_types: exp.IntoType, 1548 raw_tokens: t.List[Token], 1549 sql: t.Optional[str] = None, 1550 ) -> t.List[t.Optional[exp.Expression]]: 1551 """ 1552 Parses a list of tokens into a given Expression type. If a collection of Expression 1553 types is given instead, this method will try to parse the token list into each one 1554 of them, stopping at the first for which the parsing succeeds. 1555 1556 Args: 1557 expression_types: The expression type(s) to try and parse the token list into. 1558 raw_tokens: The list of tokens. 1559 sql: The original SQL string, used to produce helpful debug messages. 1560 1561 Returns: 1562 The target Expression. 1563 """ 1564 errors = [] 1565 for expression_type in ensure_list(expression_types): 1566 parser = self.EXPRESSION_PARSERS.get(expression_type) 1567 if not parser: 1568 raise TypeError(f"No parser registered for {expression_type}") 1569 1570 try: 1571 return self._parse(parser, raw_tokens, sql) 1572 except ParseError as e: 1573 e.errors[0]["into_expression"] = expression_type 1574 errors.append(e) 1575 1576 raise ParseError( 1577 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1578 errors=merge_errors(errors), 1579 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1619 def check_errors(self) -> None: 1620 """Logs or raises any found errors, depending on the chosen error level setting.""" 1621 if self.error_level == ErrorLevel.WARN: 1622 for error in self.errors: 1623 logger.error(str(error)) 1624 elif self.error_level == ErrorLevel.RAISE and self.errors: 1625 raise ParseError( 1626 concat_messages(self.errors, self.max_errors), 1627 errors=merge_errors(self.errors), 1628 )
Logs or raises any found errors, depending on the chosen error level setting.
1630 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1631 """ 1632 Appends an error in the list of recorded errors or raises it, depending on the chosen 1633 error level setting. 1634 """ 1635 token = token or self._curr or self._prev or Token.string("") 1636 start = token.start 1637 end = token.end + 1 1638 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1639 highlight = self.sql[start:end] 1640 end_context = self.sql[end : end + self.error_message_context] 1641 1642 error = ParseError.new( 1643 f"{message}. Line {token.line}, Col: {token.col}.\n" 1644 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1645 description=message, 1646 line=token.line, 1647 col=token.col, 1648 start_context=start_context, 1649 highlight=highlight, 1650 end_context=end_context, 1651 ) 1652 1653 if self.error_level == ErrorLevel.IMMEDIATE: 1654 raise error 1655 1656 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1658 def expression( 1659 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1660 ) -> E: 1661 """ 1662 Creates a new, validated Expression. 1663 1664 Args: 1665 exp_class: The expression class to instantiate. 1666 comments: An optional list of comments to attach to the expression. 1667 kwargs: The arguments to set for the expression along with their respective values. 1668 1669 Returns: 1670 The target expression. 1671 """ 1672 instance = exp_class(**kwargs) 1673 instance.add_comments(comments) if comments else self._add_comments(instance) 1674 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1681 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1682 """ 1683 Validates an Expression, making sure that all its mandatory arguments are set. 1684 1685 Args: 1686 expression: The expression to validate. 1687 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1688 1689 Returns: 1690 The validated expression. 1691 """ 1692 if self.error_level != ErrorLevel.IGNORE: 1693 for error_message in expression.error_messages(args): 1694 self.raise_error(error_message) 1695 1696 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4663 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4664 start = self._index 4665 _, side_token, kind_token = self._parse_join_parts() 4666 4667 side = side_token.text if side_token else None 4668 kind = kind_token.text if kind_token else None 4669 4670 if not self._match_set(self.SET_OPERATIONS): 4671 self._retreat(start) 4672 return None 4673 4674 token_type = self._prev.token_type 4675 4676 if token_type == TokenType.UNION: 4677 operation: t.Type[exp.SetOperation] = exp.Union 4678 elif token_type == TokenType.EXCEPT: 4679 operation = exp.Except 4680 else: 4681 operation = exp.Intersect 4682 4683 comments = self._prev.comments 4684 4685 if self._match(TokenType.DISTINCT): 4686 distinct: t.Optional[bool] = True 4687 elif self._match(TokenType.ALL): 4688 distinct = False 4689 else: 4690 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4691 if distinct is None: 4692 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4693 4694 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4695 "STRICT", "CORRESPONDING" 4696 ) 4697 if self._match_text_seq("CORRESPONDING"): 4698 by_name = True 4699 if not side and not kind: 4700 kind = "INNER" 4701 4702 on_column_list = None 4703 if by_name and self._match_texts(("ON", "BY")): 4704 on_column_list = self._parse_wrapped_csv(self._parse_column) 4705 4706 expression = self._parse_select(nested=True, parse_set_operation=False) 4707 4708 return self.expression( 4709 operation, 4710 comments=comments, 4711 this=this, 4712 distinct=distinct, 4713 by_name=by_name, 4714 expression=expression, 4715 side=side, 4716 kind=kind, 4717 on=on_column_list, 4718 )