sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce( 154 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 155) -> exp.Coalesce: 156 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 157 158 159def build_locate_strposition(args: t.List): 160 return exp.StrPosition( 161 this=seq_get(args, 1), 162 substr=seq_get(args, 0), 163 position=seq_get(args, 2), 164 ) 165 166 167class _Parser(type): 168 def __new__(cls, clsname, bases, attrs): 169 klass = super().__new__(cls, clsname, bases, attrs) 170 171 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 172 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 173 174 return klass 175 176 177class Parser(metaclass=_Parser): 178 """ 179 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 180 181 Args: 182 error_level: The desired error level. 183 Default: ErrorLevel.IMMEDIATE 184 error_message_context: The amount of context to capture from a query string when displaying 185 the error message (in number of characters). 186 Default: 100 187 max_errors: Maximum number of error messages to include in a raised ParseError. 188 This is only relevant if error_level is ErrorLevel.RAISE. 189 Default: 3 190 """ 191 192 FUNCTIONS: t.Dict[str, t.Callable] = { 193 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 194 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 195 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 196 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 200 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 201 ), 202 "CHAR": lambda args: exp.Chr(expressions=args), 203 "CHR": lambda args: exp.Chr(expressions=args), 204 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 205 "CONCAT": lambda args, dialect: exp.Concat( 206 expressions=args, 207 safe=not dialect.STRICT_STRING_CONCAT, 208 coalesce=dialect.CONCAT_COALESCE, 209 ), 210 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONVERT_TIMEZONE": build_convert_timezone, 216 "DATE_TO_DATE_STR": lambda args: exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 221 start=seq_get(args, 0), 222 end=seq_get(args, 1), 223 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 224 ), 225 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 226 "HEX": build_hex, 227 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 228 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 229 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 230 "LIKE": build_like, 231 "LOG": build_logarithm, 232 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 233 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 234 "LOWER": build_lower, 235 "LPAD": lambda args: build_pad(args), 236 "LEFTPAD": lambda args: build_pad(args), 237 "LTRIM": lambda args: build_trim(args), 238 "MOD": build_mod, 239 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 240 "RPAD": lambda args: build_pad(args, is_left=False), 241 "RTRIM": lambda args: build_trim(args, is_left=False), 242 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 243 if len(args) != 2 244 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 245 "STRPOS": exp.StrPosition.from_arg_list, 246 "CHARINDEX": lambda args: build_locate_strposition(args), 247 "INSTR": exp.StrPosition.from_arg_list, 248 "LOCATE": lambda args: build_locate_strposition(args), 249 "TIME_TO_TIME_STR": lambda args: exp.Cast( 250 this=seq_get(args, 0), 251 to=exp.DataType(this=exp.DataType.Type.TEXT), 252 ), 253 "TO_HEX": build_hex, 254 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 255 this=exp.Cast( 256 this=seq_get(args, 0), 257 to=exp.DataType(this=exp.DataType.Type.TEXT), 258 ), 259 start=exp.Literal.number(1), 260 length=exp.Literal.number(10), 261 ), 262 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 263 "UPPER": build_upper, 264 "VAR_MAP": build_var_map, 265 } 266 267 NO_PAREN_FUNCTIONS = { 268 TokenType.CURRENT_DATE: exp.CurrentDate, 269 TokenType.CURRENT_DATETIME: exp.CurrentDate, 270 TokenType.CURRENT_TIME: exp.CurrentTime, 271 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 272 TokenType.CURRENT_USER: exp.CurrentUser, 273 } 274 275 STRUCT_TYPE_TOKENS = { 276 TokenType.NESTED, 277 TokenType.OBJECT, 278 TokenType.STRUCT, 279 TokenType.UNION, 280 } 281 282 NESTED_TYPE_TOKENS = { 283 TokenType.ARRAY, 284 TokenType.LIST, 285 TokenType.LOWCARDINALITY, 286 TokenType.MAP, 287 TokenType.NULLABLE, 288 TokenType.RANGE, 289 *STRUCT_TYPE_TOKENS, 290 } 291 292 ENUM_TYPE_TOKENS = { 293 TokenType.DYNAMIC, 294 TokenType.ENUM, 295 TokenType.ENUM8, 296 TokenType.ENUM16, 297 } 298 299 AGGREGATE_TYPE_TOKENS = { 300 TokenType.AGGREGATEFUNCTION, 301 TokenType.SIMPLEAGGREGATEFUNCTION, 302 } 303 304 TYPE_TOKENS = { 305 TokenType.BIT, 306 TokenType.BOOLEAN, 307 TokenType.TINYINT, 308 TokenType.UTINYINT, 309 TokenType.SMALLINT, 310 TokenType.USMALLINT, 311 TokenType.INT, 312 TokenType.UINT, 313 TokenType.BIGINT, 314 TokenType.UBIGINT, 315 TokenType.INT128, 316 TokenType.UINT128, 317 TokenType.INT256, 318 TokenType.UINT256, 319 TokenType.MEDIUMINT, 320 TokenType.UMEDIUMINT, 321 TokenType.FIXEDSTRING, 322 TokenType.FLOAT, 323 TokenType.DOUBLE, 324 TokenType.UDOUBLE, 325 TokenType.CHAR, 326 TokenType.NCHAR, 327 TokenType.VARCHAR, 328 TokenType.NVARCHAR, 329 TokenType.BPCHAR, 330 TokenType.TEXT, 331 TokenType.MEDIUMTEXT, 332 TokenType.LONGTEXT, 333 TokenType.BLOB, 334 TokenType.MEDIUMBLOB, 335 TokenType.LONGBLOB, 336 TokenType.BINARY, 337 TokenType.VARBINARY, 338 TokenType.JSON, 339 TokenType.JSONB, 340 TokenType.INTERVAL, 341 TokenType.TINYBLOB, 342 TokenType.TINYTEXT, 343 TokenType.TIME, 344 TokenType.TIMETZ, 345 TokenType.TIMESTAMP, 346 TokenType.TIMESTAMP_S, 347 TokenType.TIMESTAMP_MS, 348 TokenType.TIMESTAMP_NS, 349 TokenType.TIMESTAMPTZ, 350 TokenType.TIMESTAMPLTZ, 351 TokenType.TIMESTAMPNTZ, 352 TokenType.DATETIME, 353 TokenType.DATETIME2, 354 TokenType.DATETIME64, 355 TokenType.SMALLDATETIME, 356 TokenType.DATE, 357 TokenType.DATE32, 358 TokenType.INT4RANGE, 359 TokenType.INT4MULTIRANGE, 360 TokenType.INT8RANGE, 361 TokenType.INT8MULTIRANGE, 362 TokenType.NUMRANGE, 363 TokenType.NUMMULTIRANGE, 364 TokenType.TSRANGE, 365 TokenType.TSMULTIRANGE, 366 TokenType.TSTZRANGE, 367 TokenType.TSTZMULTIRANGE, 368 TokenType.DATERANGE, 369 TokenType.DATEMULTIRANGE, 370 TokenType.DECIMAL, 371 TokenType.DECIMAL32, 372 TokenType.DECIMAL64, 373 TokenType.DECIMAL128, 374 TokenType.DECIMAL256, 375 TokenType.UDECIMAL, 376 TokenType.BIGDECIMAL, 377 TokenType.UUID, 378 TokenType.GEOGRAPHY, 379 TokenType.GEOMETRY, 380 TokenType.POINT, 381 TokenType.RING, 382 TokenType.LINESTRING, 383 TokenType.MULTILINESTRING, 384 TokenType.POLYGON, 385 TokenType.MULTIPOLYGON, 386 TokenType.HLLSKETCH, 387 TokenType.HSTORE, 388 TokenType.PSEUDO_TYPE, 389 TokenType.SUPER, 390 TokenType.SERIAL, 391 TokenType.SMALLSERIAL, 392 TokenType.BIGSERIAL, 393 TokenType.XML, 394 TokenType.YEAR, 395 TokenType.USERDEFINED, 396 TokenType.MONEY, 397 TokenType.SMALLMONEY, 398 TokenType.ROWVERSION, 399 TokenType.IMAGE, 400 TokenType.VARIANT, 401 TokenType.VECTOR, 402 TokenType.VOID, 403 TokenType.OBJECT, 404 TokenType.OBJECT_IDENTIFIER, 405 TokenType.INET, 406 TokenType.IPADDRESS, 407 TokenType.IPPREFIX, 408 TokenType.IPV4, 409 TokenType.IPV6, 410 TokenType.UNKNOWN, 411 TokenType.NOTHING, 412 TokenType.NULL, 413 TokenType.NAME, 414 TokenType.TDIGEST, 415 TokenType.DYNAMIC, 416 *ENUM_TYPE_TOKENS, 417 *NESTED_TYPE_TOKENS, 418 *AGGREGATE_TYPE_TOKENS, 419 } 420 421 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 422 TokenType.BIGINT: TokenType.UBIGINT, 423 TokenType.INT: TokenType.UINT, 424 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 425 TokenType.SMALLINT: TokenType.USMALLINT, 426 TokenType.TINYINT: TokenType.UTINYINT, 427 TokenType.DECIMAL: TokenType.UDECIMAL, 428 TokenType.DOUBLE: TokenType.UDOUBLE, 429 } 430 431 SUBQUERY_PREDICATES = { 432 TokenType.ANY: exp.Any, 433 TokenType.ALL: exp.All, 434 TokenType.EXISTS: exp.Exists, 435 TokenType.SOME: exp.Any, 436 } 437 438 RESERVED_TOKENS = { 439 *Tokenizer.SINGLE_TOKENS.values(), 440 TokenType.SELECT, 441 } - {TokenType.IDENTIFIER} 442 443 DB_CREATABLES = { 444 TokenType.DATABASE, 445 TokenType.DICTIONARY, 446 TokenType.FILE_FORMAT, 447 TokenType.MODEL, 448 TokenType.NAMESPACE, 449 TokenType.SCHEMA, 450 TokenType.SEQUENCE, 451 TokenType.SINK, 452 TokenType.SOURCE, 453 TokenType.STAGE, 454 TokenType.STORAGE_INTEGRATION, 455 TokenType.STREAMLIT, 456 TokenType.TABLE, 457 TokenType.TAG, 458 TokenType.VIEW, 459 TokenType.WAREHOUSE, 460 } 461 462 CREATABLES = { 463 TokenType.COLUMN, 464 TokenType.CONSTRAINT, 465 TokenType.FOREIGN_KEY, 466 TokenType.FUNCTION, 467 TokenType.INDEX, 468 TokenType.PROCEDURE, 469 *DB_CREATABLES, 470 } 471 472 ALTERABLES = { 473 TokenType.INDEX, 474 TokenType.TABLE, 475 TokenType.VIEW, 476 } 477 478 # Tokens that can represent identifiers 479 ID_VAR_TOKENS = { 480 TokenType.ALL, 481 TokenType.ATTACH, 482 TokenType.VAR, 483 TokenType.ANTI, 484 TokenType.APPLY, 485 TokenType.ASC, 486 TokenType.ASOF, 487 TokenType.AUTO_INCREMENT, 488 TokenType.BEGIN, 489 TokenType.BPCHAR, 490 TokenType.CACHE, 491 TokenType.CASE, 492 TokenType.COLLATE, 493 TokenType.COMMAND, 494 TokenType.COMMENT, 495 TokenType.COMMIT, 496 TokenType.CONSTRAINT, 497 TokenType.COPY, 498 TokenType.CUBE, 499 TokenType.CURRENT_SCHEMA, 500 TokenType.DEFAULT, 501 TokenType.DELETE, 502 TokenType.DESC, 503 TokenType.DESCRIBE, 504 TokenType.DETACH, 505 TokenType.DICTIONARY, 506 TokenType.DIV, 507 TokenType.END, 508 TokenType.EXECUTE, 509 TokenType.EXPORT, 510 TokenType.ESCAPE, 511 TokenType.FALSE, 512 TokenType.FIRST, 513 TokenType.FILTER, 514 TokenType.FINAL, 515 TokenType.FORMAT, 516 TokenType.FULL, 517 TokenType.GET, 518 TokenType.IDENTIFIER, 519 TokenType.IS, 520 TokenType.ISNULL, 521 TokenType.INTERVAL, 522 TokenType.KEEP, 523 TokenType.KILL, 524 TokenType.LEFT, 525 TokenType.LIMIT, 526 TokenType.LOAD, 527 TokenType.MERGE, 528 TokenType.NATURAL, 529 TokenType.NEXT, 530 TokenType.OFFSET, 531 TokenType.OPERATOR, 532 TokenType.ORDINALITY, 533 TokenType.OVERLAPS, 534 TokenType.OVERWRITE, 535 TokenType.PARTITION, 536 TokenType.PERCENT, 537 TokenType.PIVOT, 538 TokenType.PRAGMA, 539 TokenType.PUT, 540 TokenType.RANGE, 541 TokenType.RECURSIVE, 542 TokenType.REFERENCES, 543 TokenType.REFRESH, 544 TokenType.RENAME, 545 TokenType.REPLACE, 546 TokenType.RIGHT, 547 TokenType.ROLLUP, 548 TokenType.ROW, 549 TokenType.ROWS, 550 TokenType.SEMI, 551 TokenType.SET, 552 TokenType.SETTINGS, 553 TokenType.SHOW, 554 TokenType.TEMPORARY, 555 TokenType.TOP, 556 TokenType.TRUE, 557 TokenType.TRUNCATE, 558 TokenType.UNIQUE, 559 TokenType.UNNEST, 560 TokenType.UNPIVOT, 561 TokenType.UPDATE, 562 TokenType.USE, 563 TokenType.VOLATILE, 564 TokenType.WINDOW, 565 *CREATABLES, 566 *SUBQUERY_PREDICATES, 567 *TYPE_TOKENS, 568 *NO_PAREN_FUNCTIONS, 569 } 570 ID_VAR_TOKENS.remove(TokenType.UNION) 571 572 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 573 TokenType.ANTI, 574 TokenType.APPLY, 575 TokenType.ASOF, 576 TokenType.FULL, 577 TokenType.LEFT, 578 TokenType.LOCK, 579 TokenType.NATURAL, 580 TokenType.RIGHT, 581 TokenType.SEMI, 582 TokenType.WINDOW, 583 } 584 585 ALIAS_TOKENS = ID_VAR_TOKENS 586 587 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 588 589 ARRAY_CONSTRUCTORS = { 590 "ARRAY": exp.Array, 591 "LIST": exp.List, 592 } 593 594 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 595 596 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 597 598 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 599 600 FUNC_TOKENS = { 601 TokenType.COLLATE, 602 TokenType.COMMAND, 603 TokenType.CURRENT_DATE, 604 TokenType.CURRENT_DATETIME, 605 TokenType.CURRENT_SCHEMA, 606 TokenType.CURRENT_TIMESTAMP, 607 TokenType.CURRENT_TIME, 608 TokenType.CURRENT_USER, 609 TokenType.FILTER, 610 TokenType.FIRST, 611 TokenType.FORMAT, 612 TokenType.GET, 613 TokenType.GLOB, 614 TokenType.IDENTIFIER, 615 TokenType.INDEX, 616 TokenType.ISNULL, 617 TokenType.ILIKE, 618 TokenType.INSERT, 619 TokenType.LIKE, 620 TokenType.MERGE, 621 TokenType.NEXT, 622 TokenType.OFFSET, 623 TokenType.PRIMARY_KEY, 624 TokenType.RANGE, 625 TokenType.REPLACE, 626 TokenType.RLIKE, 627 TokenType.ROW, 628 TokenType.UNNEST, 629 TokenType.VAR, 630 TokenType.LEFT, 631 TokenType.RIGHT, 632 TokenType.SEQUENCE, 633 TokenType.DATE, 634 TokenType.DATETIME, 635 TokenType.TABLE, 636 TokenType.TIMESTAMP, 637 TokenType.TIMESTAMPTZ, 638 TokenType.TRUNCATE, 639 TokenType.WINDOW, 640 TokenType.XOR, 641 *TYPE_TOKENS, 642 *SUBQUERY_PREDICATES, 643 } 644 645 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.AND: exp.And, 647 } 648 649 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.COLON_EQ: exp.PropertyEQ, 651 } 652 653 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.OR: exp.Or, 655 } 656 657 EQUALITY = { 658 TokenType.EQ: exp.EQ, 659 TokenType.NEQ: exp.NEQ, 660 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 661 } 662 663 COMPARISON = { 664 TokenType.GT: exp.GT, 665 TokenType.GTE: exp.GTE, 666 TokenType.LT: exp.LT, 667 TokenType.LTE: exp.LTE, 668 } 669 670 BITWISE = { 671 TokenType.AMP: exp.BitwiseAnd, 672 TokenType.CARET: exp.BitwiseXor, 673 TokenType.PIPE: exp.BitwiseOr, 674 } 675 676 TERM = { 677 TokenType.DASH: exp.Sub, 678 TokenType.PLUS: exp.Add, 679 TokenType.MOD: exp.Mod, 680 TokenType.COLLATE: exp.Collate, 681 } 682 683 FACTOR = { 684 TokenType.DIV: exp.IntDiv, 685 TokenType.LR_ARROW: exp.Distance, 686 TokenType.SLASH: exp.Div, 687 TokenType.STAR: exp.Mul, 688 } 689 690 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 691 692 TIMES = { 693 TokenType.TIME, 694 TokenType.TIMETZ, 695 } 696 697 TIMESTAMPS = { 698 TokenType.TIMESTAMP, 699 TokenType.TIMESTAMPNTZ, 700 TokenType.TIMESTAMPTZ, 701 TokenType.TIMESTAMPLTZ, 702 *TIMES, 703 } 704 705 SET_OPERATIONS = { 706 TokenType.UNION, 707 TokenType.INTERSECT, 708 TokenType.EXCEPT, 709 } 710 711 JOIN_METHODS = { 712 TokenType.ASOF, 713 TokenType.NATURAL, 714 TokenType.POSITIONAL, 715 } 716 717 JOIN_SIDES = { 718 TokenType.LEFT, 719 TokenType.RIGHT, 720 TokenType.FULL, 721 } 722 723 JOIN_KINDS = { 724 TokenType.ANTI, 725 TokenType.CROSS, 726 TokenType.INNER, 727 TokenType.OUTER, 728 TokenType.SEMI, 729 TokenType.STRAIGHT_JOIN, 730 } 731 732 JOIN_HINTS: t.Set[str] = set() 733 734 LAMBDAS = { 735 TokenType.ARROW: lambda self, expressions: self.expression( 736 exp.Lambda, 737 this=self._replace_lambda( 738 self._parse_assignment(), 739 expressions, 740 ), 741 expressions=expressions, 742 ), 743 TokenType.FARROW: lambda self, expressions: self.expression( 744 exp.Kwarg, 745 this=exp.var(expressions[0].name), 746 expression=self._parse_assignment(), 747 ), 748 } 749 750 COLUMN_OPERATORS = { 751 TokenType.DOT: None, 752 TokenType.DOTCOLON: lambda self, this, to: self.expression( 753 exp.JSONCast, 754 this=this, 755 to=to, 756 ), 757 TokenType.DCOLON: lambda self, this, to: self.expression( 758 exp.Cast if self.STRICT_CAST else exp.TryCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.ARROW: lambda self, this, path: self.expression( 763 exp.JSONExtract, 764 this=this, 765 expression=self.dialect.to_json_path(path), 766 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 767 ), 768 TokenType.DARROW: lambda self, this, path: self.expression( 769 exp.JSONExtractScalar, 770 this=this, 771 expression=self.dialect.to_json_path(path), 772 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 773 ), 774 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 775 exp.JSONBExtract, 776 this=this, 777 expression=path, 778 ), 779 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtractScalar, 781 this=this, 782 expression=path, 783 ), 784 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 785 exp.JSONBContains, 786 this=this, 787 expression=key, 788 ), 789 } 790 791 EXPRESSION_PARSERS = { 792 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 793 exp.Column: lambda self: self._parse_column(), 794 exp.Condition: lambda self: self._parse_assignment(), 795 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 796 exp.Expression: lambda self: self._parse_expression(), 797 exp.From: lambda self: self._parse_from(joins=True), 798 exp.Group: lambda self: self._parse_group(), 799 exp.Having: lambda self: self._parse_having(), 800 exp.Hint: lambda self: self._parse_hint_body(), 801 exp.Identifier: lambda self: self._parse_id_var(), 802 exp.Join: lambda self: self._parse_join(), 803 exp.Lambda: lambda self: self._parse_lambda(), 804 exp.Lateral: lambda self: self._parse_lateral(), 805 exp.Limit: lambda self: self._parse_limit(), 806 exp.Offset: lambda self: self._parse_offset(), 807 exp.Order: lambda self: self._parse_order(), 808 exp.Ordered: lambda self: self._parse_ordered(), 809 exp.Properties: lambda self: self._parse_properties(), 810 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 811 exp.Qualify: lambda self: self._parse_qualify(), 812 exp.Returning: lambda self: self._parse_returning(), 813 exp.Select: lambda self: self._parse_select(), 814 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 815 exp.Table: lambda self: self._parse_table_parts(), 816 exp.TableAlias: lambda self: self._parse_table_alias(), 817 exp.Tuple: lambda self: self._parse_value(values=False), 818 exp.Whens: lambda self: self._parse_when_matched(), 819 exp.Where: lambda self: self._parse_where(), 820 exp.Window: lambda self: self._parse_named_window(), 821 exp.With: lambda self: self._parse_with(), 822 "JOIN_TYPE": lambda self: self._parse_join_parts(), 823 } 824 825 STATEMENT_PARSERS = { 826 TokenType.ALTER: lambda self: self._parse_alter(), 827 TokenType.ANALYZE: lambda self: self._parse_analyze(), 828 TokenType.BEGIN: lambda self: self._parse_transaction(), 829 TokenType.CACHE: lambda self: self._parse_cache(), 830 TokenType.COMMENT: lambda self: self._parse_comment(), 831 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 832 TokenType.COPY: lambda self: self._parse_copy(), 833 TokenType.CREATE: lambda self: self._parse_create(), 834 TokenType.DELETE: lambda self: self._parse_delete(), 835 TokenType.DESC: lambda self: self._parse_describe(), 836 TokenType.DESCRIBE: lambda self: self._parse_describe(), 837 TokenType.DROP: lambda self: self._parse_drop(), 838 TokenType.GRANT: lambda self: self._parse_grant(), 839 TokenType.INSERT: lambda self: self._parse_insert(), 840 TokenType.KILL: lambda self: self._parse_kill(), 841 TokenType.LOAD: lambda self: self._parse_load(), 842 TokenType.MERGE: lambda self: self._parse_merge(), 843 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 844 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 845 TokenType.REFRESH: lambda self: self._parse_refresh(), 846 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 847 TokenType.SET: lambda self: self._parse_set(), 848 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 849 TokenType.UNCACHE: lambda self: self._parse_uncache(), 850 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 851 TokenType.UPDATE: lambda self: self._parse_update(), 852 TokenType.USE: lambda self: self._parse_use(), 853 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 854 } 855 856 UNARY_PARSERS = { 857 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 858 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 859 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 860 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 861 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 862 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 863 } 864 865 STRING_PARSERS = { 866 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 867 exp.RawString, this=token.text 868 ), 869 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 870 exp.National, this=token.text 871 ), 872 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 873 TokenType.STRING: lambda self, token: self.expression( 874 exp.Literal, this=token.text, is_string=True 875 ), 876 TokenType.UNICODE_STRING: lambda self, token: self.expression( 877 exp.UnicodeString, 878 this=token.text, 879 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 880 ), 881 } 882 883 NUMERIC_PARSERS = { 884 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 885 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 886 TokenType.HEX_STRING: lambda self, token: self.expression( 887 exp.HexString, 888 this=token.text, 889 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 890 ), 891 TokenType.NUMBER: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=False 893 ), 894 } 895 896 PRIMARY_PARSERS = { 897 **STRING_PARSERS, 898 **NUMERIC_PARSERS, 899 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 900 TokenType.NULL: lambda self, _: self.expression(exp.Null), 901 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 902 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 903 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 904 TokenType.STAR: lambda self, _: self._parse_star_ops(), 905 } 906 907 PLACEHOLDER_PARSERS = { 908 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 909 TokenType.PARAMETER: lambda self: self._parse_parameter(), 910 TokenType.COLON: lambda self: ( 911 self.expression(exp.Placeholder, this=self._prev.text) 912 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 913 else None 914 ), 915 } 916 917 RANGE_PARSERS = { 918 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 919 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 920 TokenType.GLOB: binary_range_parser(exp.Glob), 921 TokenType.ILIKE: binary_range_parser(exp.ILike), 922 TokenType.IN: lambda self, this: self._parse_in(this), 923 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 924 TokenType.IS: lambda self, this: self._parse_is(this), 925 TokenType.LIKE: binary_range_parser(exp.Like), 926 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 927 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 928 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 929 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 930 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 931 } 932 933 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 934 "ALLOWED_VALUES": lambda self: self.expression( 935 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 936 ), 937 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 938 "AUTO": lambda self: self._parse_auto_property(), 939 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 940 "BACKUP": lambda self: self.expression( 941 exp.BackupProperty, this=self._parse_var(any_token=True) 942 ), 943 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 944 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 945 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 946 "CHECKSUM": lambda self: self._parse_checksum(), 947 "CLUSTER BY": lambda self: self._parse_cluster(), 948 "CLUSTERED": lambda self: self._parse_clustered_by(), 949 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 950 exp.CollateProperty, **kwargs 951 ), 952 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 953 "CONTAINS": lambda self: self._parse_contains_property(), 954 "COPY": lambda self: self._parse_copy_property(), 955 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 956 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 957 "DEFINER": lambda self: self._parse_definer(), 958 "DETERMINISTIC": lambda self: self.expression( 959 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 960 ), 961 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 962 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 963 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 964 "DISTKEY": lambda self: self._parse_distkey(), 965 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 966 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 967 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 968 "ENVIRONMENT": lambda self: self.expression( 969 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 970 ), 971 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 972 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 973 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 974 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 975 "FREESPACE": lambda self: self._parse_freespace(), 976 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 977 "HEAP": lambda self: self.expression(exp.HeapProperty), 978 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 979 "IMMUTABLE": lambda self: self.expression( 980 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 981 ), 982 "INHERITS": lambda self: self.expression( 983 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 984 ), 985 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 986 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 987 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 988 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 989 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 990 "LIKE": lambda self: self._parse_create_like(), 991 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 992 "LOCK": lambda self: self._parse_locking(), 993 "LOCKING": lambda self: self._parse_locking(), 994 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 995 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 996 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 997 "MODIFIES": lambda self: self._parse_modifies_property(), 998 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 999 "NO": lambda self: self._parse_no_property(), 1000 "ON": lambda self: self._parse_on_property(), 1001 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1002 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1003 "PARTITION": lambda self: self._parse_partitioned_of(), 1004 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1005 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1006 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1007 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1008 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1009 "READS": lambda self: self._parse_reads_property(), 1010 "REMOTE": lambda self: self._parse_remote_with_connection(), 1011 "RETURNS": lambda self: self._parse_returns(), 1012 "STRICT": lambda self: self.expression(exp.StrictProperty), 1013 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1014 "ROW": lambda self: self._parse_row(), 1015 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1016 "SAMPLE": lambda self: self.expression( 1017 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1018 ), 1019 "SECURE": lambda self: self.expression(exp.SecureProperty), 1020 "SECURITY": lambda self: self._parse_security(), 1021 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1022 "SETTINGS": lambda self: self._parse_settings_property(), 1023 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1024 "SORTKEY": lambda self: self._parse_sortkey(), 1025 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1026 "STABLE": lambda self: self.expression( 1027 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1028 ), 1029 "STORED": lambda self: self._parse_stored(), 1030 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1031 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1032 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1033 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1034 "TO": lambda self: self._parse_to_table(), 1035 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1036 "TRANSFORM": lambda self: self.expression( 1037 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1038 ), 1039 "TTL": lambda self: self._parse_ttl(), 1040 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1041 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1042 "VOLATILE": lambda self: self._parse_volatile_property(), 1043 "WITH": lambda self: self._parse_with_property(), 1044 } 1045 1046 CONSTRAINT_PARSERS = { 1047 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1048 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1049 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1050 "CHARACTER SET": lambda self: self.expression( 1051 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1052 ), 1053 "CHECK": lambda self: self.expression( 1054 exp.CheckColumnConstraint, 1055 this=self._parse_wrapped(self._parse_assignment), 1056 enforced=self._match_text_seq("ENFORCED"), 1057 ), 1058 "COLLATE": lambda self: self.expression( 1059 exp.CollateColumnConstraint, 1060 this=self._parse_identifier() or self._parse_column(), 1061 ), 1062 "COMMENT": lambda self: self.expression( 1063 exp.CommentColumnConstraint, this=self._parse_string() 1064 ), 1065 "COMPRESS": lambda self: self._parse_compress(), 1066 "CLUSTERED": lambda self: self.expression( 1067 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1068 ), 1069 "NONCLUSTERED": lambda self: self.expression( 1070 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1071 ), 1072 "DEFAULT": lambda self: self.expression( 1073 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1074 ), 1075 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1076 "EPHEMERAL": lambda self: self.expression( 1077 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1078 ), 1079 "EXCLUDE": lambda self: self.expression( 1080 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1081 ), 1082 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1083 "FORMAT": lambda self: self.expression( 1084 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1085 ), 1086 "GENERATED": lambda self: self._parse_generated_as_identity(), 1087 "IDENTITY": lambda self: self._parse_auto_increment(), 1088 "INLINE": lambda self: self._parse_inline(), 1089 "LIKE": lambda self: self._parse_create_like(), 1090 "NOT": lambda self: self._parse_not_constraint(), 1091 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1092 "ON": lambda self: ( 1093 self._match(TokenType.UPDATE) 1094 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1095 ) 1096 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1097 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1098 "PERIOD": lambda self: self._parse_period_for_system_time(), 1099 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1100 "REFERENCES": lambda self: self._parse_references(match=False), 1101 "TITLE": lambda self: self.expression( 1102 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1103 ), 1104 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1105 "UNIQUE": lambda self: self._parse_unique(), 1106 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1107 "WATERMARK": lambda self: self.expression( 1108 exp.WatermarkColumnConstraint, 1109 this=self._match(TokenType.FOR) and self._parse_column(), 1110 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1111 ), 1112 "WITH": lambda self: self.expression( 1113 exp.Properties, expressions=self._parse_wrapped_properties() 1114 ), 1115 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1116 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1117 } 1118 1119 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1120 klass = ( 1121 exp.PartitionedByBucket 1122 if self._prev.text.upper() == "BUCKET" 1123 else exp.PartitionByTruncate 1124 ) 1125 1126 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1127 this, expression = seq_get(args, 0), seq_get(args, 1) 1128 1129 if isinstance(this, exp.Literal): 1130 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1131 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1132 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1133 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1134 # 1135 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1136 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1137 this, expression = expression, this 1138 1139 return self.expression(klass, this=this, expression=expression) 1140 1141 ALTER_PARSERS = { 1142 "ADD": lambda self: self._parse_alter_table_add(), 1143 "AS": lambda self: self._parse_select(), 1144 "ALTER": lambda self: self._parse_alter_table_alter(), 1145 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1146 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1147 "DROP": lambda self: self._parse_alter_table_drop(), 1148 "RENAME": lambda self: self._parse_alter_table_rename(), 1149 "SET": lambda self: self._parse_alter_table_set(), 1150 "SWAP": lambda self: self.expression( 1151 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1152 ), 1153 } 1154 1155 ALTER_ALTER_PARSERS = { 1156 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1157 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1158 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1159 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1160 } 1161 1162 SCHEMA_UNNAMED_CONSTRAINTS = { 1163 "CHECK", 1164 "EXCLUDE", 1165 "FOREIGN KEY", 1166 "LIKE", 1167 "PERIOD", 1168 "PRIMARY KEY", 1169 "UNIQUE", 1170 "WATERMARK", 1171 "BUCKET", 1172 "TRUNCATE", 1173 } 1174 1175 NO_PAREN_FUNCTION_PARSERS = { 1176 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1177 "CASE": lambda self: self._parse_case(), 1178 "CONNECT_BY_ROOT": lambda self: self.expression( 1179 exp.ConnectByRoot, this=self._parse_column() 1180 ), 1181 "IF": lambda self: self._parse_if(), 1182 } 1183 1184 INVALID_FUNC_NAME_TOKENS = { 1185 TokenType.IDENTIFIER, 1186 TokenType.STRING, 1187 } 1188 1189 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1190 1191 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1192 1193 FUNCTION_PARSERS = { 1194 **{ 1195 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1196 }, 1197 **{ 1198 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1199 }, 1200 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1201 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1202 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1203 "DECODE": lambda self: self._parse_decode(), 1204 "EXTRACT": lambda self: self._parse_extract(), 1205 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1206 "GAP_FILL": lambda self: self._parse_gap_fill(), 1207 "JSON_OBJECT": lambda self: self._parse_json_object(), 1208 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1209 "JSON_TABLE": lambda self: self._parse_json_table(), 1210 "MATCH": lambda self: self._parse_match_against(), 1211 "NORMALIZE": lambda self: self._parse_normalize(), 1212 "OPENJSON": lambda self: self._parse_open_json(), 1213 "OVERLAY": lambda self: self._parse_overlay(), 1214 "POSITION": lambda self: self._parse_position(), 1215 "PREDICT": lambda self: self._parse_predict(), 1216 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1217 "STRING_AGG": lambda self: self._parse_string_agg(), 1218 "SUBSTRING": lambda self: self._parse_substring(), 1219 "TRIM": lambda self: self._parse_trim(), 1220 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1221 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1222 "XMLELEMENT": lambda self: self.expression( 1223 exp.XMLElement, 1224 this=self._match_text_seq("NAME") and self._parse_id_var(), 1225 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1226 ), 1227 "XMLTABLE": lambda self: self._parse_xml_table(), 1228 } 1229 1230 QUERY_MODIFIER_PARSERS = { 1231 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1232 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1233 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1234 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1235 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1236 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1237 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1238 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1239 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1240 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1241 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1242 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1243 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1244 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1245 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1246 TokenType.CLUSTER_BY: lambda self: ( 1247 "cluster", 1248 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1249 ), 1250 TokenType.DISTRIBUTE_BY: lambda self: ( 1251 "distribute", 1252 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1253 ), 1254 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1255 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1256 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1257 } 1258 1259 SET_PARSERS = { 1260 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1261 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1262 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1263 "TRANSACTION": lambda self: self._parse_set_transaction(), 1264 } 1265 1266 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1267 1268 TYPE_LITERAL_PARSERS = { 1269 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1270 } 1271 1272 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1273 1274 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1275 1276 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1277 1278 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1279 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1280 "ISOLATION": ( 1281 ("LEVEL", "REPEATABLE", "READ"), 1282 ("LEVEL", "READ", "COMMITTED"), 1283 ("LEVEL", "READ", "UNCOMITTED"), 1284 ("LEVEL", "SERIALIZABLE"), 1285 ), 1286 "READ": ("WRITE", "ONLY"), 1287 } 1288 1289 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1290 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1291 ) 1292 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1293 1294 CREATE_SEQUENCE: OPTIONS_TYPE = { 1295 "SCALE": ("EXTEND", "NOEXTEND"), 1296 "SHARD": ("EXTEND", "NOEXTEND"), 1297 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1298 **dict.fromkeys( 1299 ( 1300 "SESSION", 1301 "GLOBAL", 1302 "KEEP", 1303 "NOKEEP", 1304 "ORDER", 1305 "NOORDER", 1306 "NOCACHE", 1307 "CYCLE", 1308 "NOCYCLE", 1309 "NOMINVALUE", 1310 "NOMAXVALUE", 1311 "NOSCALE", 1312 "NOSHARD", 1313 ), 1314 tuple(), 1315 ), 1316 } 1317 1318 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1319 1320 USABLES: OPTIONS_TYPE = dict.fromkeys( 1321 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1322 ) 1323 1324 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1325 1326 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1327 "TYPE": ("EVOLUTION",), 1328 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1329 } 1330 1331 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1332 1333 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1334 1335 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1336 "NOT": ("ENFORCED",), 1337 "MATCH": ( 1338 "FULL", 1339 "PARTIAL", 1340 "SIMPLE", 1341 ), 1342 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1343 "USING": ( 1344 "BTREE", 1345 "HASH", 1346 ), 1347 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1348 } 1349 1350 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1351 "NO": ("OTHERS",), 1352 "CURRENT": ("ROW",), 1353 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1354 } 1355 1356 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1357 1358 CLONE_KEYWORDS = {"CLONE", "COPY"} 1359 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1360 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1361 1362 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1363 1364 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1365 1366 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1367 1368 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1369 1370 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1371 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1372 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1373 1374 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1375 1376 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1377 1378 ADD_CONSTRAINT_TOKENS = { 1379 TokenType.CONSTRAINT, 1380 TokenType.FOREIGN_KEY, 1381 TokenType.INDEX, 1382 TokenType.KEY, 1383 TokenType.PRIMARY_KEY, 1384 TokenType.UNIQUE, 1385 } 1386 1387 DISTINCT_TOKENS = {TokenType.DISTINCT} 1388 1389 NULL_TOKENS = {TokenType.NULL} 1390 1391 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1392 1393 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1394 1395 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1396 1397 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1398 1399 ODBC_DATETIME_LITERALS = { 1400 "d": exp.Date, 1401 "t": exp.Time, 1402 "ts": exp.Timestamp, 1403 } 1404 1405 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1406 1407 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1408 1409 # The style options for the DESCRIBE statement 1410 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1411 1412 # The style options for the ANALYZE statement 1413 ANALYZE_STYLES = { 1414 "BUFFER_USAGE_LIMIT", 1415 "FULL", 1416 "LOCAL", 1417 "NO_WRITE_TO_BINLOG", 1418 "SAMPLE", 1419 "SKIP_LOCKED", 1420 "VERBOSE", 1421 } 1422 1423 ANALYZE_EXPRESSION_PARSERS = { 1424 "ALL": lambda self: self._parse_analyze_columns(), 1425 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1426 "DELETE": lambda self: self._parse_analyze_delete(), 1427 "DROP": lambda self: self._parse_analyze_histogram(), 1428 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1429 "LIST": lambda self: self._parse_analyze_list(), 1430 "PREDICATE": lambda self: self._parse_analyze_columns(), 1431 "UPDATE": lambda self: self._parse_analyze_histogram(), 1432 "VALIDATE": lambda self: self._parse_analyze_validate(), 1433 } 1434 1435 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1436 1437 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1438 1439 OPERATION_MODIFIERS: t.Set[str] = set() 1440 1441 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1442 1443 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1444 1445 STRICT_CAST = True 1446 1447 PREFIXED_PIVOT_COLUMNS = False 1448 IDENTIFY_PIVOT_STRINGS = False 1449 1450 LOG_DEFAULTS_TO_LN = False 1451 1452 # Whether ADD is present for each column added by ALTER TABLE 1453 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1454 1455 # Whether the table sample clause expects CSV syntax 1456 TABLESAMPLE_CSV = False 1457 1458 # The default method used for table sampling 1459 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1460 1461 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1462 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1463 1464 # Whether the TRIM function expects the characters to trim as its first argument 1465 TRIM_PATTERN_FIRST = False 1466 1467 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1468 STRING_ALIASES = False 1469 1470 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1471 MODIFIERS_ATTACHED_TO_SET_OP = True 1472 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1473 1474 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1475 NO_PAREN_IF_COMMANDS = True 1476 1477 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1478 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1479 1480 # Whether the `:` operator is used to extract a value from a VARIANT column 1481 COLON_IS_VARIANT_EXTRACT = False 1482 1483 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1484 # If this is True and '(' is not found, the keyword will be treated as an identifier 1485 VALUES_FOLLOWED_BY_PAREN = True 1486 1487 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1488 SUPPORTS_IMPLICIT_UNNEST = False 1489 1490 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1491 INTERVAL_SPANS = True 1492 1493 # Whether a PARTITION clause can follow a table reference 1494 SUPPORTS_PARTITION_SELECTION = False 1495 1496 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1497 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1498 1499 # Whether the 'AS' keyword is optional in the CTE definition syntax 1500 OPTIONAL_ALIAS_TOKEN_CTE = True 1501 1502 __slots__ = ( 1503 "error_level", 1504 "error_message_context", 1505 "max_errors", 1506 "dialect", 1507 "sql", 1508 "errors", 1509 "_tokens", 1510 "_index", 1511 "_curr", 1512 "_next", 1513 "_prev", 1514 "_prev_comments", 1515 ) 1516 1517 # Autofilled 1518 SHOW_TRIE: t.Dict = {} 1519 SET_TRIE: t.Dict = {} 1520 1521 def __init__( 1522 self, 1523 error_level: t.Optional[ErrorLevel] = None, 1524 error_message_context: int = 100, 1525 max_errors: int = 3, 1526 dialect: DialectType = None, 1527 ): 1528 from sqlglot.dialects import Dialect 1529 1530 self.error_level = error_level or ErrorLevel.IMMEDIATE 1531 self.error_message_context = error_message_context 1532 self.max_errors = max_errors 1533 self.dialect = Dialect.get_or_raise(dialect) 1534 self.reset() 1535 1536 def reset(self): 1537 self.sql = "" 1538 self.errors = [] 1539 self._tokens = [] 1540 self._index = 0 1541 self._curr = None 1542 self._next = None 1543 self._prev = None 1544 self._prev_comments = None 1545 1546 def parse( 1547 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1548 ) -> t.List[t.Optional[exp.Expression]]: 1549 """ 1550 Parses a list of tokens and returns a list of syntax trees, one tree 1551 per parsed SQL statement. 1552 1553 Args: 1554 raw_tokens: The list of tokens. 1555 sql: The original SQL string, used to produce helpful debug messages. 1556 1557 Returns: 1558 The list of the produced syntax trees. 1559 """ 1560 return self._parse( 1561 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1562 ) 1563 1564 def parse_into( 1565 self, 1566 expression_types: exp.IntoType, 1567 raw_tokens: t.List[Token], 1568 sql: t.Optional[str] = None, 1569 ) -> t.List[t.Optional[exp.Expression]]: 1570 """ 1571 Parses a list of tokens into a given Expression type. If a collection of Expression 1572 types is given instead, this method will try to parse the token list into each one 1573 of them, stopping at the first for which the parsing succeeds. 1574 1575 Args: 1576 expression_types: The expression type(s) to try and parse the token list into. 1577 raw_tokens: The list of tokens. 1578 sql: The original SQL string, used to produce helpful debug messages. 1579 1580 Returns: 1581 The target Expression. 1582 """ 1583 errors = [] 1584 for expression_type in ensure_list(expression_types): 1585 parser = self.EXPRESSION_PARSERS.get(expression_type) 1586 if not parser: 1587 raise TypeError(f"No parser registered for {expression_type}") 1588 1589 try: 1590 return self._parse(parser, raw_tokens, sql) 1591 except ParseError as e: 1592 e.errors[0]["into_expression"] = expression_type 1593 errors.append(e) 1594 1595 raise ParseError( 1596 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1597 errors=merge_errors(errors), 1598 ) from errors[-1] 1599 1600 def _parse( 1601 self, 1602 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1603 raw_tokens: t.List[Token], 1604 sql: t.Optional[str] = None, 1605 ) -> t.List[t.Optional[exp.Expression]]: 1606 self.reset() 1607 self.sql = sql or "" 1608 1609 total = len(raw_tokens) 1610 chunks: t.List[t.List[Token]] = [[]] 1611 1612 for i, token in enumerate(raw_tokens): 1613 if token.token_type == TokenType.SEMICOLON: 1614 if token.comments: 1615 chunks.append([token]) 1616 1617 if i < total - 1: 1618 chunks.append([]) 1619 else: 1620 chunks[-1].append(token) 1621 1622 expressions = [] 1623 1624 for tokens in chunks: 1625 self._index = -1 1626 self._tokens = tokens 1627 self._advance() 1628 1629 expressions.append(parse_method(self)) 1630 1631 if self._index < len(self._tokens): 1632 self.raise_error("Invalid expression / Unexpected token") 1633 1634 self.check_errors() 1635 1636 return expressions 1637 1638 def check_errors(self) -> None: 1639 """Logs or raises any found errors, depending on the chosen error level setting.""" 1640 if self.error_level == ErrorLevel.WARN: 1641 for error in self.errors: 1642 logger.error(str(error)) 1643 elif self.error_level == ErrorLevel.RAISE and self.errors: 1644 raise ParseError( 1645 concat_messages(self.errors, self.max_errors), 1646 errors=merge_errors(self.errors), 1647 ) 1648 1649 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1650 """ 1651 Appends an error in the list of recorded errors or raises it, depending on the chosen 1652 error level setting. 1653 """ 1654 token = token or self._curr or self._prev or Token.string("") 1655 start = token.start 1656 end = token.end + 1 1657 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1658 highlight = self.sql[start:end] 1659 end_context = self.sql[end : end + self.error_message_context] 1660 1661 error = ParseError.new( 1662 f"{message}. Line {token.line}, Col: {token.col}.\n" 1663 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1664 description=message, 1665 line=token.line, 1666 col=token.col, 1667 start_context=start_context, 1668 highlight=highlight, 1669 end_context=end_context, 1670 ) 1671 1672 if self.error_level == ErrorLevel.IMMEDIATE: 1673 raise error 1674 1675 self.errors.append(error) 1676 1677 def expression( 1678 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1679 ) -> E: 1680 """ 1681 Creates a new, validated Expression. 1682 1683 Args: 1684 exp_class: The expression class to instantiate. 1685 comments: An optional list of comments to attach to the expression. 1686 kwargs: The arguments to set for the expression along with their respective values. 1687 1688 Returns: 1689 The target expression. 1690 """ 1691 instance = exp_class(**kwargs) 1692 instance.add_comments(comments) if comments else self._add_comments(instance) 1693 return self.validate_expression(instance) 1694 1695 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1696 if expression and self._prev_comments: 1697 expression.add_comments(self._prev_comments) 1698 self._prev_comments = None 1699 1700 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1701 """ 1702 Validates an Expression, making sure that all its mandatory arguments are set. 1703 1704 Args: 1705 expression: The expression to validate. 1706 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1707 1708 Returns: 1709 The validated expression. 1710 """ 1711 if self.error_level != ErrorLevel.IGNORE: 1712 for error_message in expression.error_messages(args): 1713 self.raise_error(error_message) 1714 1715 return expression 1716 1717 def _find_sql(self, start: Token, end: Token) -> str: 1718 return self.sql[start.start : end.end + 1] 1719 1720 def _is_connected(self) -> bool: 1721 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1722 1723 def _advance(self, times: int = 1) -> None: 1724 self._index += times 1725 self._curr = seq_get(self._tokens, self._index) 1726 self._next = seq_get(self._tokens, self._index + 1) 1727 1728 if self._index > 0: 1729 self._prev = self._tokens[self._index - 1] 1730 self._prev_comments = self._prev.comments 1731 else: 1732 self._prev = None 1733 self._prev_comments = None 1734 1735 def _retreat(self, index: int) -> None: 1736 if index != self._index: 1737 self._advance(index - self._index) 1738 1739 def _warn_unsupported(self) -> None: 1740 if len(self._tokens) <= 1: 1741 return 1742 1743 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1744 # interested in emitting a warning for the one being currently processed. 1745 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1746 1747 logger.warning( 1748 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1749 ) 1750 1751 def _parse_command(self) -> exp.Command: 1752 self._warn_unsupported() 1753 return self.expression( 1754 exp.Command, 1755 comments=self._prev_comments, 1756 this=self._prev.text.upper(), 1757 expression=self._parse_string(), 1758 ) 1759 1760 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1761 """ 1762 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1763 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1764 solve this by setting & resetting the parser state accordingly 1765 """ 1766 index = self._index 1767 error_level = self.error_level 1768 1769 self.error_level = ErrorLevel.IMMEDIATE 1770 try: 1771 this = parse_method() 1772 except ParseError: 1773 this = None 1774 finally: 1775 if not this or retreat: 1776 self._retreat(index) 1777 self.error_level = error_level 1778 1779 return this 1780 1781 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1782 start = self._prev 1783 exists = self._parse_exists() if allow_exists else None 1784 1785 self._match(TokenType.ON) 1786 1787 materialized = self._match_text_seq("MATERIALIZED") 1788 kind = self._match_set(self.CREATABLES) and self._prev 1789 if not kind: 1790 return self._parse_as_command(start) 1791 1792 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1793 this = self._parse_user_defined_function(kind=kind.token_type) 1794 elif kind.token_type == TokenType.TABLE: 1795 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1796 elif kind.token_type == TokenType.COLUMN: 1797 this = self._parse_column() 1798 else: 1799 this = self._parse_id_var() 1800 1801 self._match(TokenType.IS) 1802 1803 return self.expression( 1804 exp.Comment, 1805 this=this, 1806 kind=kind.text, 1807 expression=self._parse_string(), 1808 exists=exists, 1809 materialized=materialized, 1810 ) 1811 1812 def _parse_to_table( 1813 self, 1814 ) -> exp.ToTableProperty: 1815 table = self._parse_table_parts(schema=True) 1816 return self.expression(exp.ToTableProperty, this=table) 1817 1818 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1819 def _parse_ttl(self) -> exp.Expression: 1820 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1821 this = self._parse_bitwise() 1822 1823 if self._match_text_seq("DELETE"): 1824 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1825 if self._match_text_seq("RECOMPRESS"): 1826 return self.expression( 1827 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1828 ) 1829 if self._match_text_seq("TO", "DISK"): 1830 return self.expression( 1831 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1832 ) 1833 if self._match_text_seq("TO", "VOLUME"): 1834 return self.expression( 1835 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1836 ) 1837 1838 return this 1839 1840 expressions = self._parse_csv(_parse_ttl_action) 1841 where = self._parse_where() 1842 group = self._parse_group() 1843 1844 aggregates = None 1845 if group and self._match(TokenType.SET): 1846 aggregates = self._parse_csv(self._parse_set_item) 1847 1848 return self.expression( 1849 exp.MergeTreeTTL, 1850 expressions=expressions, 1851 where=where, 1852 group=group, 1853 aggregates=aggregates, 1854 ) 1855 1856 def _parse_statement(self) -> t.Optional[exp.Expression]: 1857 if self._curr is None: 1858 return None 1859 1860 if self._match_set(self.STATEMENT_PARSERS): 1861 comments = self._prev_comments 1862 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1863 stmt.add_comments(comments, prepend=True) 1864 return stmt 1865 1866 if self._match_set(self.dialect.tokenizer.COMMANDS): 1867 return self._parse_command() 1868 1869 expression = self._parse_expression() 1870 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1871 return self._parse_query_modifiers(expression) 1872 1873 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1874 start = self._prev 1875 temporary = self._match(TokenType.TEMPORARY) 1876 materialized = self._match_text_seq("MATERIALIZED") 1877 1878 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1879 if not kind: 1880 return self._parse_as_command(start) 1881 1882 concurrently = self._match_text_seq("CONCURRENTLY") 1883 if_exists = exists or self._parse_exists() 1884 1885 if kind == "COLUMN": 1886 this = self._parse_column() 1887 else: 1888 this = self._parse_table_parts( 1889 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1890 ) 1891 1892 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1893 1894 if self._match(TokenType.L_PAREN, advance=False): 1895 expressions = self._parse_wrapped_csv(self._parse_types) 1896 else: 1897 expressions = None 1898 1899 return self.expression( 1900 exp.Drop, 1901 exists=if_exists, 1902 this=this, 1903 expressions=expressions, 1904 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1905 temporary=temporary, 1906 materialized=materialized, 1907 cascade=self._match_text_seq("CASCADE"), 1908 constraints=self._match_text_seq("CONSTRAINTS"), 1909 purge=self._match_text_seq("PURGE"), 1910 cluster=cluster, 1911 concurrently=concurrently, 1912 ) 1913 1914 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1915 return ( 1916 self._match_text_seq("IF") 1917 and (not not_ or self._match(TokenType.NOT)) 1918 and self._match(TokenType.EXISTS) 1919 ) 1920 1921 def _parse_create(self) -> exp.Create | exp.Command: 1922 # Note: this can't be None because we've matched a statement parser 1923 start = self._prev 1924 1925 replace = ( 1926 start.token_type == TokenType.REPLACE 1927 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1928 or self._match_pair(TokenType.OR, TokenType.ALTER) 1929 ) 1930 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1931 1932 unique = self._match(TokenType.UNIQUE) 1933 1934 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1935 clustered = True 1936 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1937 "COLUMNSTORE" 1938 ): 1939 clustered = False 1940 else: 1941 clustered = None 1942 1943 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1944 self._advance() 1945 1946 properties = None 1947 create_token = self._match_set(self.CREATABLES) and self._prev 1948 1949 if not create_token: 1950 # exp.Properties.Location.POST_CREATE 1951 properties = self._parse_properties() 1952 create_token = self._match_set(self.CREATABLES) and self._prev 1953 1954 if not properties or not create_token: 1955 return self._parse_as_command(start) 1956 1957 concurrently = self._match_text_seq("CONCURRENTLY") 1958 exists = self._parse_exists(not_=True) 1959 this = None 1960 expression: t.Optional[exp.Expression] = None 1961 indexes = None 1962 no_schema_binding = None 1963 begin = None 1964 end = None 1965 clone = None 1966 1967 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1968 nonlocal properties 1969 if properties and temp_props: 1970 properties.expressions.extend(temp_props.expressions) 1971 elif temp_props: 1972 properties = temp_props 1973 1974 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1975 this = self._parse_user_defined_function(kind=create_token.token_type) 1976 1977 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1978 extend_props(self._parse_properties()) 1979 1980 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1981 extend_props(self._parse_properties()) 1982 1983 if not expression: 1984 if self._match(TokenType.COMMAND): 1985 expression = self._parse_as_command(self._prev) 1986 else: 1987 begin = self._match(TokenType.BEGIN) 1988 return_ = self._match_text_seq("RETURN") 1989 1990 if self._match(TokenType.STRING, advance=False): 1991 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1992 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1993 expression = self._parse_string() 1994 extend_props(self._parse_properties()) 1995 else: 1996 expression = self._parse_user_defined_function_expression() 1997 1998 end = self._match_text_seq("END") 1999 2000 if return_: 2001 expression = self.expression(exp.Return, this=expression) 2002 elif create_token.token_type == TokenType.INDEX: 2003 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2004 if not self._match(TokenType.ON): 2005 index = self._parse_id_var() 2006 anonymous = False 2007 else: 2008 index = None 2009 anonymous = True 2010 2011 this = self._parse_index(index=index, anonymous=anonymous) 2012 elif create_token.token_type in self.DB_CREATABLES: 2013 table_parts = self._parse_table_parts( 2014 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2015 ) 2016 2017 # exp.Properties.Location.POST_NAME 2018 self._match(TokenType.COMMA) 2019 extend_props(self._parse_properties(before=True)) 2020 2021 this = self._parse_schema(this=table_parts) 2022 2023 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2024 extend_props(self._parse_properties()) 2025 2026 has_alias = self._match(TokenType.ALIAS) 2027 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2028 # exp.Properties.Location.POST_ALIAS 2029 extend_props(self._parse_properties()) 2030 2031 if create_token.token_type == TokenType.SEQUENCE: 2032 expression = self._parse_types() 2033 extend_props(self._parse_properties()) 2034 else: 2035 expression = self._parse_ddl_select() 2036 2037 # Some dialects also support using a table as an alias instead of a SELECT. 2038 # Here we fallback to this as an alternative. 2039 if not expression and has_alias: 2040 expression = self._try_parse(self._parse_table_parts) 2041 2042 if create_token.token_type == TokenType.TABLE: 2043 # exp.Properties.Location.POST_EXPRESSION 2044 extend_props(self._parse_properties()) 2045 2046 indexes = [] 2047 while True: 2048 index = self._parse_index() 2049 2050 # exp.Properties.Location.POST_INDEX 2051 extend_props(self._parse_properties()) 2052 if not index: 2053 break 2054 else: 2055 self._match(TokenType.COMMA) 2056 indexes.append(index) 2057 elif create_token.token_type == TokenType.VIEW: 2058 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2059 no_schema_binding = True 2060 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2061 extend_props(self._parse_properties()) 2062 2063 shallow = self._match_text_seq("SHALLOW") 2064 2065 if self._match_texts(self.CLONE_KEYWORDS): 2066 copy = self._prev.text.lower() == "copy" 2067 clone = self.expression( 2068 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2069 ) 2070 2071 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2072 return self._parse_as_command(start) 2073 2074 create_kind_text = create_token.text.upper() 2075 return self.expression( 2076 exp.Create, 2077 this=this, 2078 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2079 replace=replace, 2080 refresh=refresh, 2081 unique=unique, 2082 expression=expression, 2083 exists=exists, 2084 properties=properties, 2085 indexes=indexes, 2086 no_schema_binding=no_schema_binding, 2087 begin=begin, 2088 end=end, 2089 clone=clone, 2090 concurrently=concurrently, 2091 clustered=clustered, 2092 ) 2093 2094 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2095 seq = exp.SequenceProperties() 2096 2097 options = [] 2098 index = self._index 2099 2100 while self._curr: 2101 self._match(TokenType.COMMA) 2102 if self._match_text_seq("INCREMENT"): 2103 self._match_text_seq("BY") 2104 self._match_text_seq("=") 2105 seq.set("increment", self._parse_term()) 2106 elif self._match_text_seq("MINVALUE"): 2107 seq.set("minvalue", self._parse_term()) 2108 elif self._match_text_seq("MAXVALUE"): 2109 seq.set("maxvalue", self._parse_term()) 2110 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2111 self._match_text_seq("=") 2112 seq.set("start", self._parse_term()) 2113 elif self._match_text_seq("CACHE"): 2114 # T-SQL allows empty CACHE which is initialized dynamically 2115 seq.set("cache", self._parse_number() or True) 2116 elif self._match_text_seq("OWNED", "BY"): 2117 # "OWNED BY NONE" is the default 2118 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2119 else: 2120 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2121 if opt: 2122 options.append(opt) 2123 else: 2124 break 2125 2126 seq.set("options", options if options else None) 2127 return None if self._index == index else seq 2128 2129 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2130 # only used for teradata currently 2131 self._match(TokenType.COMMA) 2132 2133 kwargs = { 2134 "no": self._match_text_seq("NO"), 2135 "dual": self._match_text_seq("DUAL"), 2136 "before": self._match_text_seq("BEFORE"), 2137 "default": self._match_text_seq("DEFAULT"), 2138 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2139 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2140 "after": self._match_text_seq("AFTER"), 2141 "minimum": self._match_texts(("MIN", "MINIMUM")), 2142 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2143 } 2144 2145 if self._match_texts(self.PROPERTY_PARSERS): 2146 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2147 try: 2148 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2149 except TypeError: 2150 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2151 2152 return None 2153 2154 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2155 return self._parse_wrapped_csv(self._parse_property) 2156 2157 def _parse_property(self) -> t.Optional[exp.Expression]: 2158 if self._match_texts(self.PROPERTY_PARSERS): 2159 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2160 2161 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2162 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2163 2164 if self._match_text_seq("COMPOUND", "SORTKEY"): 2165 return self._parse_sortkey(compound=True) 2166 2167 if self._match_text_seq("SQL", "SECURITY"): 2168 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2169 2170 index = self._index 2171 key = self._parse_column() 2172 2173 if not self._match(TokenType.EQ): 2174 self._retreat(index) 2175 return self._parse_sequence_properties() 2176 2177 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2178 if isinstance(key, exp.Column): 2179 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2180 2181 value = self._parse_bitwise() or self._parse_var(any_token=True) 2182 2183 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2184 if isinstance(value, exp.Column): 2185 value = exp.var(value.name) 2186 2187 return self.expression(exp.Property, this=key, value=value) 2188 2189 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2190 if self._match_text_seq("BY"): 2191 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2192 2193 self._match(TokenType.ALIAS) 2194 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2195 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2196 2197 return self.expression( 2198 exp.FileFormatProperty, 2199 this=( 2200 self.expression( 2201 exp.InputOutputFormat, 2202 input_format=input_format, 2203 output_format=output_format, 2204 ) 2205 if input_format or output_format 2206 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2207 ), 2208 ) 2209 2210 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2211 field = self._parse_field() 2212 if isinstance(field, exp.Identifier) and not field.quoted: 2213 field = exp.var(field) 2214 2215 return field 2216 2217 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2218 self._match(TokenType.EQ) 2219 self._match(TokenType.ALIAS) 2220 2221 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2222 2223 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2224 properties = [] 2225 while True: 2226 if before: 2227 prop = self._parse_property_before() 2228 else: 2229 prop = self._parse_property() 2230 if not prop: 2231 break 2232 for p in ensure_list(prop): 2233 properties.append(p) 2234 2235 if properties: 2236 return self.expression(exp.Properties, expressions=properties) 2237 2238 return None 2239 2240 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2241 return self.expression( 2242 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2243 ) 2244 2245 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2246 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2247 security_specifier = self._prev.text.upper() 2248 return self.expression(exp.SecurityProperty, this=security_specifier) 2249 return None 2250 2251 def _parse_settings_property(self) -> exp.SettingsProperty: 2252 return self.expression( 2253 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2254 ) 2255 2256 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2257 if self._index >= 2: 2258 pre_volatile_token = self._tokens[self._index - 2] 2259 else: 2260 pre_volatile_token = None 2261 2262 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2263 return exp.VolatileProperty() 2264 2265 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2266 2267 def _parse_retention_period(self) -> exp.Var: 2268 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2269 number = self._parse_number() 2270 number_str = f"{number} " if number else "" 2271 unit = self._parse_var(any_token=True) 2272 return exp.var(f"{number_str}{unit}") 2273 2274 def _parse_system_versioning_property( 2275 self, with_: bool = False 2276 ) -> exp.WithSystemVersioningProperty: 2277 self._match(TokenType.EQ) 2278 prop = self.expression( 2279 exp.WithSystemVersioningProperty, 2280 **{ # type: ignore 2281 "on": True, 2282 "with": with_, 2283 }, 2284 ) 2285 2286 if self._match_text_seq("OFF"): 2287 prop.set("on", False) 2288 return prop 2289 2290 self._match(TokenType.ON) 2291 if self._match(TokenType.L_PAREN): 2292 while self._curr and not self._match(TokenType.R_PAREN): 2293 if self._match_text_seq("HISTORY_TABLE", "="): 2294 prop.set("this", self._parse_table_parts()) 2295 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2296 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2297 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2298 prop.set("retention_period", self._parse_retention_period()) 2299 2300 self._match(TokenType.COMMA) 2301 2302 return prop 2303 2304 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2305 self._match(TokenType.EQ) 2306 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2307 prop = self.expression(exp.DataDeletionProperty, on=on) 2308 2309 if self._match(TokenType.L_PAREN): 2310 while self._curr and not self._match(TokenType.R_PAREN): 2311 if self._match_text_seq("FILTER_COLUMN", "="): 2312 prop.set("filter_column", self._parse_column()) 2313 elif self._match_text_seq("RETENTION_PERIOD", "="): 2314 prop.set("retention_period", self._parse_retention_period()) 2315 2316 self._match(TokenType.COMMA) 2317 2318 return prop 2319 2320 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2321 kind = "HASH" 2322 expressions: t.Optional[t.List[exp.Expression]] = None 2323 if self._match_text_seq("BY", "HASH"): 2324 expressions = self._parse_wrapped_csv(self._parse_id_var) 2325 elif self._match_text_seq("BY", "RANDOM"): 2326 kind = "RANDOM" 2327 2328 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2329 buckets: t.Optional[exp.Expression] = None 2330 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2331 buckets = self._parse_number() 2332 2333 return self.expression( 2334 exp.DistributedByProperty, 2335 expressions=expressions, 2336 kind=kind, 2337 buckets=buckets, 2338 order=self._parse_order(), 2339 ) 2340 2341 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2342 self._match_text_seq("KEY") 2343 expressions = self._parse_wrapped_id_vars() 2344 return self.expression(expr_type, expressions=expressions) 2345 2346 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2347 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2348 prop = self._parse_system_versioning_property(with_=True) 2349 self._match_r_paren() 2350 return prop 2351 2352 if self._match(TokenType.L_PAREN, advance=False): 2353 return self._parse_wrapped_properties() 2354 2355 if self._match_text_seq("JOURNAL"): 2356 return self._parse_withjournaltable() 2357 2358 if self._match_texts(self.VIEW_ATTRIBUTES): 2359 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2360 2361 if self._match_text_seq("DATA"): 2362 return self._parse_withdata(no=False) 2363 elif self._match_text_seq("NO", "DATA"): 2364 return self._parse_withdata(no=True) 2365 2366 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2367 return self._parse_serde_properties(with_=True) 2368 2369 if self._match(TokenType.SCHEMA): 2370 return self.expression( 2371 exp.WithSchemaBindingProperty, 2372 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2373 ) 2374 2375 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2376 return self.expression( 2377 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2378 ) 2379 2380 if not self._next: 2381 return None 2382 2383 return self._parse_withisolatedloading() 2384 2385 def _parse_procedure_option(self) -> exp.Expression | None: 2386 if self._match_text_seq("EXECUTE", "AS"): 2387 return self.expression( 2388 exp.ExecuteAsProperty, 2389 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2390 or self._parse_string(), 2391 ) 2392 2393 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2394 2395 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2396 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2397 self._match(TokenType.EQ) 2398 2399 user = self._parse_id_var() 2400 self._match(TokenType.PARAMETER) 2401 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2402 2403 if not user or not host: 2404 return None 2405 2406 return exp.DefinerProperty(this=f"{user}@{host}") 2407 2408 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2409 self._match(TokenType.TABLE) 2410 self._match(TokenType.EQ) 2411 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2412 2413 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2414 return self.expression(exp.LogProperty, no=no) 2415 2416 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2417 return self.expression(exp.JournalProperty, **kwargs) 2418 2419 def _parse_checksum(self) -> exp.ChecksumProperty: 2420 self._match(TokenType.EQ) 2421 2422 on = None 2423 if self._match(TokenType.ON): 2424 on = True 2425 elif self._match_text_seq("OFF"): 2426 on = False 2427 2428 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2429 2430 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2431 return self.expression( 2432 exp.Cluster, 2433 expressions=( 2434 self._parse_wrapped_csv(self._parse_ordered) 2435 if wrapped 2436 else self._parse_csv(self._parse_ordered) 2437 ), 2438 ) 2439 2440 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2441 self._match_text_seq("BY") 2442 2443 self._match_l_paren() 2444 expressions = self._parse_csv(self._parse_column) 2445 self._match_r_paren() 2446 2447 if self._match_text_seq("SORTED", "BY"): 2448 self._match_l_paren() 2449 sorted_by = self._parse_csv(self._parse_ordered) 2450 self._match_r_paren() 2451 else: 2452 sorted_by = None 2453 2454 self._match(TokenType.INTO) 2455 buckets = self._parse_number() 2456 self._match_text_seq("BUCKETS") 2457 2458 return self.expression( 2459 exp.ClusteredByProperty, 2460 expressions=expressions, 2461 sorted_by=sorted_by, 2462 buckets=buckets, 2463 ) 2464 2465 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2466 if not self._match_text_seq("GRANTS"): 2467 self._retreat(self._index - 1) 2468 return None 2469 2470 return self.expression(exp.CopyGrantsProperty) 2471 2472 def _parse_freespace(self) -> exp.FreespaceProperty: 2473 self._match(TokenType.EQ) 2474 return self.expression( 2475 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2476 ) 2477 2478 def _parse_mergeblockratio( 2479 self, no: bool = False, default: bool = False 2480 ) -> exp.MergeBlockRatioProperty: 2481 if self._match(TokenType.EQ): 2482 return self.expression( 2483 exp.MergeBlockRatioProperty, 2484 this=self._parse_number(), 2485 percent=self._match(TokenType.PERCENT), 2486 ) 2487 2488 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2489 2490 def _parse_datablocksize( 2491 self, 2492 default: t.Optional[bool] = None, 2493 minimum: t.Optional[bool] = None, 2494 maximum: t.Optional[bool] = None, 2495 ) -> exp.DataBlocksizeProperty: 2496 self._match(TokenType.EQ) 2497 size = self._parse_number() 2498 2499 units = None 2500 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2501 units = self._prev.text 2502 2503 return self.expression( 2504 exp.DataBlocksizeProperty, 2505 size=size, 2506 units=units, 2507 default=default, 2508 minimum=minimum, 2509 maximum=maximum, 2510 ) 2511 2512 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2513 self._match(TokenType.EQ) 2514 always = self._match_text_seq("ALWAYS") 2515 manual = self._match_text_seq("MANUAL") 2516 never = self._match_text_seq("NEVER") 2517 default = self._match_text_seq("DEFAULT") 2518 2519 autotemp = None 2520 if self._match_text_seq("AUTOTEMP"): 2521 autotemp = self._parse_schema() 2522 2523 return self.expression( 2524 exp.BlockCompressionProperty, 2525 always=always, 2526 manual=manual, 2527 never=never, 2528 default=default, 2529 autotemp=autotemp, 2530 ) 2531 2532 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2533 index = self._index 2534 no = self._match_text_seq("NO") 2535 concurrent = self._match_text_seq("CONCURRENT") 2536 2537 if not self._match_text_seq("ISOLATED", "LOADING"): 2538 self._retreat(index) 2539 return None 2540 2541 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2542 return self.expression( 2543 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2544 ) 2545 2546 def _parse_locking(self) -> exp.LockingProperty: 2547 if self._match(TokenType.TABLE): 2548 kind = "TABLE" 2549 elif self._match(TokenType.VIEW): 2550 kind = "VIEW" 2551 elif self._match(TokenType.ROW): 2552 kind = "ROW" 2553 elif self._match_text_seq("DATABASE"): 2554 kind = "DATABASE" 2555 else: 2556 kind = None 2557 2558 if kind in ("DATABASE", "TABLE", "VIEW"): 2559 this = self._parse_table_parts() 2560 else: 2561 this = None 2562 2563 if self._match(TokenType.FOR): 2564 for_or_in = "FOR" 2565 elif self._match(TokenType.IN): 2566 for_or_in = "IN" 2567 else: 2568 for_or_in = None 2569 2570 if self._match_text_seq("ACCESS"): 2571 lock_type = "ACCESS" 2572 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2573 lock_type = "EXCLUSIVE" 2574 elif self._match_text_seq("SHARE"): 2575 lock_type = "SHARE" 2576 elif self._match_text_seq("READ"): 2577 lock_type = "READ" 2578 elif self._match_text_seq("WRITE"): 2579 lock_type = "WRITE" 2580 elif self._match_text_seq("CHECKSUM"): 2581 lock_type = "CHECKSUM" 2582 else: 2583 lock_type = None 2584 2585 override = self._match_text_seq("OVERRIDE") 2586 2587 return self.expression( 2588 exp.LockingProperty, 2589 this=this, 2590 kind=kind, 2591 for_or_in=for_or_in, 2592 lock_type=lock_type, 2593 override=override, 2594 ) 2595 2596 def _parse_partition_by(self) -> t.List[exp.Expression]: 2597 if self._match(TokenType.PARTITION_BY): 2598 return self._parse_csv(self._parse_assignment) 2599 return [] 2600 2601 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2602 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2603 if self._match_text_seq("MINVALUE"): 2604 return exp.var("MINVALUE") 2605 if self._match_text_seq("MAXVALUE"): 2606 return exp.var("MAXVALUE") 2607 return self._parse_bitwise() 2608 2609 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2610 expression = None 2611 from_expressions = None 2612 to_expressions = None 2613 2614 if self._match(TokenType.IN): 2615 this = self._parse_wrapped_csv(self._parse_bitwise) 2616 elif self._match(TokenType.FROM): 2617 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2618 self._match_text_seq("TO") 2619 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2620 elif self._match_text_seq("WITH", "(", "MODULUS"): 2621 this = self._parse_number() 2622 self._match_text_seq(",", "REMAINDER") 2623 expression = self._parse_number() 2624 self._match_r_paren() 2625 else: 2626 self.raise_error("Failed to parse partition bound spec.") 2627 2628 return self.expression( 2629 exp.PartitionBoundSpec, 2630 this=this, 2631 expression=expression, 2632 from_expressions=from_expressions, 2633 to_expressions=to_expressions, 2634 ) 2635 2636 # https://www.postgresql.org/docs/current/sql-createtable.html 2637 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2638 if not self._match_text_seq("OF"): 2639 self._retreat(self._index - 1) 2640 return None 2641 2642 this = self._parse_table(schema=True) 2643 2644 if self._match(TokenType.DEFAULT): 2645 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2646 elif self._match_text_seq("FOR", "VALUES"): 2647 expression = self._parse_partition_bound_spec() 2648 else: 2649 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2650 2651 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2652 2653 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2654 self._match(TokenType.EQ) 2655 return self.expression( 2656 exp.PartitionedByProperty, 2657 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2658 ) 2659 2660 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2661 if self._match_text_seq("AND", "STATISTICS"): 2662 statistics = True 2663 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2664 statistics = False 2665 else: 2666 statistics = None 2667 2668 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2669 2670 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2671 if self._match_text_seq("SQL"): 2672 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2673 return None 2674 2675 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2676 if self._match_text_seq("SQL", "DATA"): 2677 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2678 return None 2679 2680 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2681 if self._match_text_seq("PRIMARY", "INDEX"): 2682 return exp.NoPrimaryIndexProperty() 2683 if self._match_text_seq("SQL"): 2684 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2685 return None 2686 2687 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2688 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2689 return exp.OnCommitProperty() 2690 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2691 return exp.OnCommitProperty(delete=True) 2692 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2693 2694 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2695 if self._match_text_seq("SQL", "DATA"): 2696 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2697 return None 2698 2699 def _parse_distkey(self) -> exp.DistKeyProperty: 2700 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2701 2702 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2703 table = self._parse_table(schema=True) 2704 2705 options = [] 2706 while self._match_texts(("INCLUDING", "EXCLUDING")): 2707 this = self._prev.text.upper() 2708 2709 id_var = self._parse_id_var() 2710 if not id_var: 2711 return None 2712 2713 options.append( 2714 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2715 ) 2716 2717 return self.expression(exp.LikeProperty, this=table, expressions=options) 2718 2719 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2720 return self.expression( 2721 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2722 ) 2723 2724 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2725 self._match(TokenType.EQ) 2726 return self.expression( 2727 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2728 ) 2729 2730 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2731 self._match_text_seq("WITH", "CONNECTION") 2732 return self.expression( 2733 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2734 ) 2735 2736 def _parse_returns(self) -> exp.ReturnsProperty: 2737 value: t.Optional[exp.Expression] 2738 null = None 2739 is_table = self._match(TokenType.TABLE) 2740 2741 if is_table: 2742 if self._match(TokenType.LT): 2743 value = self.expression( 2744 exp.Schema, 2745 this="TABLE", 2746 expressions=self._parse_csv(self._parse_struct_types), 2747 ) 2748 if not self._match(TokenType.GT): 2749 self.raise_error("Expecting >") 2750 else: 2751 value = self._parse_schema(exp.var("TABLE")) 2752 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2753 null = True 2754 value = None 2755 else: 2756 value = self._parse_types() 2757 2758 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2759 2760 def _parse_describe(self) -> exp.Describe: 2761 kind = self._match_set(self.CREATABLES) and self._prev.text 2762 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2763 if self._match(TokenType.DOT): 2764 style = None 2765 self._retreat(self._index - 2) 2766 2767 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2768 2769 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2770 this = self._parse_statement() 2771 else: 2772 this = self._parse_table(schema=True) 2773 2774 properties = self._parse_properties() 2775 expressions = properties.expressions if properties else None 2776 partition = self._parse_partition() 2777 return self.expression( 2778 exp.Describe, 2779 this=this, 2780 style=style, 2781 kind=kind, 2782 expressions=expressions, 2783 partition=partition, 2784 format=format, 2785 ) 2786 2787 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2788 kind = self._prev.text.upper() 2789 expressions = [] 2790 2791 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2792 if self._match(TokenType.WHEN): 2793 expression = self._parse_disjunction() 2794 self._match(TokenType.THEN) 2795 else: 2796 expression = None 2797 2798 else_ = self._match(TokenType.ELSE) 2799 2800 if not self._match(TokenType.INTO): 2801 return None 2802 2803 return self.expression( 2804 exp.ConditionalInsert, 2805 this=self.expression( 2806 exp.Insert, 2807 this=self._parse_table(schema=True), 2808 expression=self._parse_derived_table_values(), 2809 ), 2810 expression=expression, 2811 else_=else_, 2812 ) 2813 2814 expression = parse_conditional_insert() 2815 while expression is not None: 2816 expressions.append(expression) 2817 expression = parse_conditional_insert() 2818 2819 return self.expression( 2820 exp.MultitableInserts, 2821 kind=kind, 2822 comments=comments, 2823 expressions=expressions, 2824 source=self._parse_table(), 2825 ) 2826 2827 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2828 comments = [] 2829 hint = self._parse_hint() 2830 overwrite = self._match(TokenType.OVERWRITE) 2831 ignore = self._match(TokenType.IGNORE) 2832 local = self._match_text_seq("LOCAL") 2833 alternative = None 2834 is_function = None 2835 2836 if self._match_text_seq("DIRECTORY"): 2837 this: t.Optional[exp.Expression] = self.expression( 2838 exp.Directory, 2839 this=self._parse_var_or_string(), 2840 local=local, 2841 row_format=self._parse_row_format(match_row=True), 2842 ) 2843 else: 2844 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2845 comments += ensure_list(self._prev_comments) 2846 return self._parse_multitable_inserts(comments) 2847 2848 if self._match(TokenType.OR): 2849 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2850 2851 self._match(TokenType.INTO) 2852 comments += ensure_list(self._prev_comments) 2853 self._match(TokenType.TABLE) 2854 is_function = self._match(TokenType.FUNCTION) 2855 2856 this = ( 2857 self._parse_table(schema=True, parse_partition=True) 2858 if not is_function 2859 else self._parse_function() 2860 ) 2861 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2862 this.set("alias", self._parse_table_alias()) 2863 2864 returning = self._parse_returning() 2865 2866 return self.expression( 2867 exp.Insert, 2868 comments=comments, 2869 hint=hint, 2870 is_function=is_function, 2871 this=this, 2872 stored=self._match_text_seq("STORED") and self._parse_stored(), 2873 by_name=self._match_text_seq("BY", "NAME"), 2874 exists=self._parse_exists(), 2875 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2876 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2877 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2878 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2879 conflict=self._parse_on_conflict(), 2880 returning=returning or self._parse_returning(), 2881 overwrite=overwrite, 2882 alternative=alternative, 2883 ignore=ignore, 2884 source=self._match(TokenType.TABLE) and self._parse_table(), 2885 ) 2886 2887 def _parse_kill(self) -> exp.Kill: 2888 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2889 2890 return self.expression( 2891 exp.Kill, 2892 this=self._parse_primary(), 2893 kind=kind, 2894 ) 2895 2896 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2897 conflict = self._match_text_seq("ON", "CONFLICT") 2898 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2899 2900 if not conflict and not duplicate: 2901 return None 2902 2903 conflict_keys = None 2904 constraint = None 2905 2906 if conflict: 2907 if self._match_text_seq("ON", "CONSTRAINT"): 2908 constraint = self._parse_id_var() 2909 elif self._match(TokenType.L_PAREN): 2910 conflict_keys = self._parse_csv(self._parse_id_var) 2911 self._match_r_paren() 2912 2913 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2914 if self._prev.token_type == TokenType.UPDATE: 2915 self._match(TokenType.SET) 2916 expressions = self._parse_csv(self._parse_equality) 2917 else: 2918 expressions = None 2919 2920 return self.expression( 2921 exp.OnConflict, 2922 duplicate=duplicate, 2923 expressions=expressions, 2924 action=action, 2925 conflict_keys=conflict_keys, 2926 constraint=constraint, 2927 where=self._parse_where(), 2928 ) 2929 2930 def _parse_returning(self) -> t.Optional[exp.Returning]: 2931 if not self._match(TokenType.RETURNING): 2932 return None 2933 return self.expression( 2934 exp.Returning, 2935 expressions=self._parse_csv(self._parse_expression), 2936 into=self._match(TokenType.INTO) and self._parse_table_part(), 2937 ) 2938 2939 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2940 if not self._match(TokenType.FORMAT): 2941 return None 2942 return self._parse_row_format() 2943 2944 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2945 index = self._index 2946 with_ = with_ or self._match_text_seq("WITH") 2947 2948 if not self._match(TokenType.SERDE_PROPERTIES): 2949 self._retreat(index) 2950 return None 2951 return self.expression( 2952 exp.SerdeProperties, 2953 **{ # type: ignore 2954 "expressions": self._parse_wrapped_properties(), 2955 "with": with_, 2956 }, 2957 ) 2958 2959 def _parse_row_format( 2960 self, match_row: bool = False 2961 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2962 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2963 return None 2964 2965 if self._match_text_seq("SERDE"): 2966 this = self._parse_string() 2967 2968 serde_properties = self._parse_serde_properties() 2969 2970 return self.expression( 2971 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2972 ) 2973 2974 self._match_text_seq("DELIMITED") 2975 2976 kwargs = {} 2977 2978 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2979 kwargs["fields"] = self._parse_string() 2980 if self._match_text_seq("ESCAPED", "BY"): 2981 kwargs["escaped"] = self._parse_string() 2982 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2983 kwargs["collection_items"] = self._parse_string() 2984 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2985 kwargs["map_keys"] = self._parse_string() 2986 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2987 kwargs["lines"] = self._parse_string() 2988 if self._match_text_seq("NULL", "DEFINED", "AS"): 2989 kwargs["null"] = self._parse_string() 2990 2991 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2992 2993 def _parse_load(self) -> exp.LoadData | exp.Command: 2994 if self._match_text_seq("DATA"): 2995 local = self._match_text_seq("LOCAL") 2996 self._match_text_seq("INPATH") 2997 inpath = self._parse_string() 2998 overwrite = self._match(TokenType.OVERWRITE) 2999 self._match_pair(TokenType.INTO, TokenType.TABLE) 3000 3001 return self.expression( 3002 exp.LoadData, 3003 this=self._parse_table(schema=True), 3004 local=local, 3005 overwrite=overwrite, 3006 inpath=inpath, 3007 partition=self._parse_partition(), 3008 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3009 serde=self._match_text_seq("SERDE") and self._parse_string(), 3010 ) 3011 return self._parse_as_command(self._prev) 3012 3013 def _parse_delete(self) -> exp.Delete: 3014 # This handles MySQL's "Multiple-Table Syntax" 3015 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3016 tables = None 3017 if not self._match(TokenType.FROM, advance=False): 3018 tables = self._parse_csv(self._parse_table) or None 3019 3020 returning = self._parse_returning() 3021 3022 return self.expression( 3023 exp.Delete, 3024 tables=tables, 3025 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3026 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3027 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3028 where=self._parse_where(), 3029 returning=returning or self._parse_returning(), 3030 limit=self._parse_limit(), 3031 ) 3032 3033 def _parse_update(self) -> exp.Update: 3034 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3035 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3036 returning = self._parse_returning() 3037 return self.expression( 3038 exp.Update, 3039 **{ # type: ignore 3040 "this": this, 3041 "expressions": expressions, 3042 "from": self._parse_from(joins=True), 3043 "where": self._parse_where(), 3044 "returning": returning or self._parse_returning(), 3045 "order": self._parse_order(), 3046 "limit": self._parse_limit(), 3047 }, 3048 ) 3049 3050 def _parse_use(self) -> exp.Use: 3051 return self.expression( 3052 exp.Use, 3053 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3054 this=self._parse_table(schema=False), 3055 ) 3056 3057 def _parse_uncache(self) -> exp.Uncache: 3058 if not self._match(TokenType.TABLE): 3059 self.raise_error("Expecting TABLE after UNCACHE") 3060 3061 return self.expression( 3062 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3063 ) 3064 3065 def _parse_cache(self) -> exp.Cache: 3066 lazy = self._match_text_seq("LAZY") 3067 self._match(TokenType.TABLE) 3068 table = self._parse_table(schema=True) 3069 3070 options = [] 3071 if self._match_text_seq("OPTIONS"): 3072 self._match_l_paren() 3073 k = self._parse_string() 3074 self._match(TokenType.EQ) 3075 v = self._parse_string() 3076 options = [k, v] 3077 self._match_r_paren() 3078 3079 self._match(TokenType.ALIAS) 3080 return self.expression( 3081 exp.Cache, 3082 this=table, 3083 lazy=lazy, 3084 options=options, 3085 expression=self._parse_select(nested=True), 3086 ) 3087 3088 def _parse_partition(self) -> t.Optional[exp.Partition]: 3089 if not self._match_texts(self.PARTITION_KEYWORDS): 3090 return None 3091 3092 return self.expression( 3093 exp.Partition, 3094 subpartition=self._prev.text.upper() == "SUBPARTITION", 3095 expressions=self._parse_wrapped_csv(self._parse_assignment), 3096 ) 3097 3098 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3099 def _parse_value_expression() -> t.Optional[exp.Expression]: 3100 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3101 return exp.var(self._prev.text.upper()) 3102 return self._parse_expression() 3103 3104 if self._match(TokenType.L_PAREN): 3105 expressions = self._parse_csv(_parse_value_expression) 3106 self._match_r_paren() 3107 return self.expression(exp.Tuple, expressions=expressions) 3108 3109 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3110 expression = self._parse_expression() 3111 if expression: 3112 return self.expression(exp.Tuple, expressions=[expression]) 3113 return None 3114 3115 def _parse_projections(self) -> t.List[exp.Expression]: 3116 return self._parse_expressions() 3117 3118 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3119 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3120 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3121 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3122 ) 3123 elif self._match(TokenType.FROM): 3124 from_ = self._parse_from(skip_from_token=True) 3125 # Support parentheses for duckdb FROM-first syntax 3126 select = self._parse_select() 3127 if select: 3128 select.set("from", from_) 3129 this = select 3130 else: 3131 this = exp.select("*").from_(t.cast(exp.From, from_)) 3132 else: 3133 this = ( 3134 self._parse_table() 3135 if table 3136 else self._parse_select(nested=True, parse_set_operation=False) 3137 ) 3138 3139 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3140 # in case a modifier (e.g. join) is following 3141 if table and isinstance(this, exp.Values) and this.alias: 3142 alias = this.args["alias"].pop() 3143 this = exp.Table(this=this, alias=alias) 3144 3145 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3146 3147 return this 3148 3149 def _parse_select( 3150 self, 3151 nested: bool = False, 3152 table: bool = False, 3153 parse_subquery_alias: bool = True, 3154 parse_set_operation: bool = True, 3155 ) -> t.Optional[exp.Expression]: 3156 cte = self._parse_with() 3157 3158 if cte: 3159 this = self._parse_statement() 3160 3161 if not this: 3162 self.raise_error("Failed to parse any statement following CTE") 3163 return cte 3164 3165 if "with" in this.arg_types: 3166 this.set("with", cte) 3167 else: 3168 self.raise_error(f"{this.key} does not support CTE") 3169 this = cte 3170 3171 return this 3172 3173 # duckdb supports leading with FROM x 3174 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3175 3176 if self._match(TokenType.SELECT): 3177 comments = self._prev_comments 3178 3179 hint = self._parse_hint() 3180 3181 if self._next and not self._next.token_type == TokenType.DOT: 3182 all_ = self._match(TokenType.ALL) 3183 distinct = self._match_set(self.DISTINCT_TOKENS) 3184 else: 3185 all_, distinct = None, None 3186 3187 kind = ( 3188 self._match(TokenType.ALIAS) 3189 and self._match_texts(("STRUCT", "VALUE")) 3190 and self._prev.text.upper() 3191 ) 3192 3193 if distinct: 3194 distinct = self.expression( 3195 exp.Distinct, 3196 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3197 ) 3198 3199 if all_ and distinct: 3200 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3201 3202 operation_modifiers = [] 3203 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3204 operation_modifiers.append(exp.var(self._prev.text.upper())) 3205 3206 limit = self._parse_limit(top=True) 3207 projections = self._parse_projections() 3208 3209 this = self.expression( 3210 exp.Select, 3211 kind=kind, 3212 hint=hint, 3213 distinct=distinct, 3214 expressions=projections, 3215 limit=limit, 3216 operation_modifiers=operation_modifiers or None, 3217 ) 3218 this.comments = comments 3219 3220 into = self._parse_into() 3221 if into: 3222 this.set("into", into) 3223 3224 if not from_: 3225 from_ = self._parse_from() 3226 3227 if from_: 3228 this.set("from", from_) 3229 3230 this = self._parse_query_modifiers(this) 3231 elif (table or nested) and self._match(TokenType.L_PAREN): 3232 this = self._parse_wrapped_select(table=table) 3233 3234 # We return early here so that the UNION isn't attached to the subquery by the 3235 # following call to _parse_set_operations, but instead becomes the parent node 3236 self._match_r_paren() 3237 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3238 elif self._match(TokenType.VALUES, advance=False): 3239 this = self._parse_derived_table_values() 3240 elif from_: 3241 this = exp.select("*").from_(from_.this, copy=False) 3242 elif self._match(TokenType.SUMMARIZE): 3243 table = self._match(TokenType.TABLE) 3244 this = self._parse_select() or self._parse_string() or self._parse_table() 3245 return self.expression(exp.Summarize, this=this, table=table) 3246 elif self._match(TokenType.DESCRIBE): 3247 this = self._parse_describe() 3248 elif self._match_text_seq("STREAM"): 3249 this = self._parse_function() 3250 if this: 3251 this = self.expression(exp.Stream, this=this) 3252 else: 3253 self._retreat(self._index - 1) 3254 else: 3255 this = None 3256 3257 return self._parse_set_operations(this) if parse_set_operation else this 3258 3259 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3260 self._match_text_seq("SEARCH") 3261 3262 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3263 3264 if not kind: 3265 return None 3266 3267 self._match_text_seq("FIRST", "BY") 3268 3269 return self.expression( 3270 exp.RecursiveWithSearch, 3271 kind=kind, 3272 this=self._parse_id_var(), 3273 expression=self._match_text_seq("SET") and self._parse_id_var(), 3274 using=self._match_text_seq("USING") and self._parse_id_var(), 3275 ) 3276 3277 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3278 if not skip_with_token and not self._match(TokenType.WITH): 3279 return None 3280 3281 comments = self._prev_comments 3282 recursive = self._match(TokenType.RECURSIVE) 3283 3284 last_comments = None 3285 expressions = [] 3286 while True: 3287 cte = self._parse_cte() 3288 if isinstance(cte, exp.CTE): 3289 expressions.append(cte) 3290 if last_comments: 3291 cte.add_comments(last_comments) 3292 3293 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3294 break 3295 else: 3296 self._match(TokenType.WITH) 3297 3298 last_comments = self._prev_comments 3299 3300 return self.expression( 3301 exp.With, 3302 comments=comments, 3303 expressions=expressions, 3304 recursive=recursive, 3305 search=self._parse_recursive_with_search(), 3306 ) 3307 3308 def _parse_cte(self) -> t.Optional[exp.CTE]: 3309 index = self._index 3310 3311 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3312 if not alias or not alias.this: 3313 self.raise_error("Expected CTE to have alias") 3314 3315 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3316 self._retreat(index) 3317 return None 3318 3319 comments = self._prev_comments 3320 3321 if self._match_text_seq("NOT", "MATERIALIZED"): 3322 materialized = False 3323 elif self._match_text_seq("MATERIALIZED"): 3324 materialized = True 3325 else: 3326 materialized = None 3327 3328 cte = self.expression( 3329 exp.CTE, 3330 this=self._parse_wrapped(self._parse_statement), 3331 alias=alias, 3332 materialized=materialized, 3333 comments=comments, 3334 ) 3335 3336 if isinstance(cte.this, exp.Values): 3337 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3338 3339 return cte 3340 3341 def _parse_table_alias( 3342 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3343 ) -> t.Optional[exp.TableAlias]: 3344 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3345 # so this section tries to parse the clause version and if it fails, it treats the token 3346 # as an identifier (alias) 3347 if self._can_parse_limit_or_offset(): 3348 return None 3349 3350 any_token = self._match(TokenType.ALIAS) 3351 alias = ( 3352 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3353 or self._parse_string_as_identifier() 3354 ) 3355 3356 index = self._index 3357 if self._match(TokenType.L_PAREN): 3358 columns = self._parse_csv(self._parse_function_parameter) 3359 self._match_r_paren() if columns else self._retreat(index) 3360 else: 3361 columns = None 3362 3363 if not alias and not columns: 3364 return None 3365 3366 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3367 3368 # We bubble up comments from the Identifier to the TableAlias 3369 if isinstance(alias, exp.Identifier): 3370 table_alias.add_comments(alias.pop_comments()) 3371 3372 return table_alias 3373 3374 def _parse_subquery( 3375 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3376 ) -> t.Optional[exp.Subquery]: 3377 if not this: 3378 return None 3379 3380 return self.expression( 3381 exp.Subquery, 3382 this=this, 3383 pivots=self._parse_pivots(), 3384 alias=self._parse_table_alias() if parse_alias else None, 3385 sample=self._parse_table_sample(), 3386 ) 3387 3388 def _implicit_unnests_to_explicit(self, this: E) -> E: 3389 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3390 3391 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3392 for i, join in enumerate(this.args.get("joins") or []): 3393 table = join.this 3394 normalized_table = table.copy() 3395 normalized_table.meta["maybe_column"] = True 3396 normalized_table = _norm(normalized_table, dialect=self.dialect) 3397 3398 if isinstance(table, exp.Table) and not join.args.get("on"): 3399 if normalized_table.parts[0].name in refs: 3400 table_as_column = table.to_column() 3401 unnest = exp.Unnest(expressions=[table_as_column]) 3402 3403 # Table.to_column creates a parent Alias node that we want to convert to 3404 # a TableAlias and attach to the Unnest, so it matches the parser's output 3405 if isinstance(table.args.get("alias"), exp.TableAlias): 3406 table_as_column.replace(table_as_column.this) 3407 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3408 3409 table.replace(unnest) 3410 3411 refs.add(normalized_table.alias_or_name) 3412 3413 return this 3414 3415 def _parse_query_modifiers( 3416 self, this: t.Optional[exp.Expression] 3417 ) -> t.Optional[exp.Expression]: 3418 if isinstance(this, self.MODIFIABLES): 3419 for join in self._parse_joins(): 3420 this.append("joins", join) 3421 for lateral in iter(self._parse_lateral, None): 3422 this.append("laterals", lateral) 3423 3424 while True: 3425 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3426 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3427 key, expression = parser(self) 3428 3429 if expression: 3430 this.set(key, expression) 3431 if key == "limit": 3432 offset = expression.args.pop("offset", None) 3433 3434 if offset: 3435 offset = exp.Offset(expression=offset) 3436 this.set("offset", offset) 3437 3438 limit_by_expressions = expression.expressions 3439 expression.set("expressions", None) 3440 offset.set("expressions", limit_by_expressions) 3441 continue 3442 break 3443 3444 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3445 this = self._implicit_unnests_to_explicit(this) 3446 3447 return this 3448 3449 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3450 start = self._curr 3451 while self._curr: 3452 self._advance() 3453 3454 end = self._tokens[self._index - 1] 3455 return exp.Hint(expressions=[self._find_sql(start, end)]) 3456 3457 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3458 return self._parse_function_call() 3459 3460 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3461 start_index = self._index 3462 should_fallback_to_string = False 3463 3464 hints = [] 3465 try: 3466 for hint in iter( 3467 lambda: self._parse_csv( 3468 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3469 ), 3470 [], 3471 ): 3472 hints.extend(hint) 3473 except ParseError: 3474 should_fallback_to_string = True 3475 3476 if should_fallback_to_string or self._curr: 3477 self._retreat(start_index) 3478 return self._parse_hint_fallback_to_string() 3479 3480 return self.expression(exp.Hint, expressions=hints) 3481 3482 def _parse_hint(self) -> t.Optional[exp.Hint]: 3483 if self._match(TokenType.HINT) and self._prev_comments: 3484 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3485 3486 return None 3487 3488 def _parse_into(self) -> t.Optional[exp.Into]: 3489 if not self._match(TokenType.INTO): 3490 return None 3491 3492 temp = self._match(TokenType.TEMPORARY) 3493 unlogged = self._match_text_seq("UNLOGGED") 3494 self._match(TokenType.TABLE) 3495 3496 return self.expression( 3497 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3498 ) 3499 3500 def _parse_from( 3501 self, joins: bool = False, skip_from_token: bool = False 3502 ) -> t.Optional[exp.From]: 3503 if not skip_from_token and not self._match(TokenType.FROM): 3504 return None 3505 3506 return self.expression( 3507 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3508 ) 3509 3510 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3511 return self.expression( 3512 exp.MatchRecognizeMeasure, 3513 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3514 this=self._parse_expression(), 3515 ) 3516 3517 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3518 if not self._match(TokenType.MATCH_RECOGNIZE): 3519 return None 3520 3521 self._match_l_paren() 3522 3523 partition = self._parse_partition_by() 3524 order = self._parse_order() 3525 3526 measures = ( 3527 self._parse_csv(self._parse_match_recognize_measure) 3528 if self._match_text_seq("MEASURES") 3529 else None 3530 ) 3531 3532 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3533 rows = exp.var("ONE ROW PER MATCH") 3534 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3535 text = "ALL ROWS PER MATCH" 3536 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3537 text += " SHOW EMPTY MATCHES" 3538 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3539 text += " OMIT EMPTY MATCHES" 3540 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3541 text += " WITH UNMATCHED ROWS" 3542 rows = exp.var(text) 3543 else: 3544 rows = None 3545 3546 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3547 text = "AFTER MATCH SKIP" 3548 if self._match_text_seq("PAST", "LAST", "ROW"): 3549 text += " PAST LAST ROW" 3550 elif self._match_text_seq("TO", "NEXT", "ROW"): 3551 text += " TO NEXT ROW" 3552 elif self._match_text_seq("TO", "FIRST"): 3553 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3554 elif self._match_text_seq("TO", "LAST"): 3555 text += f" TO LAST {self._advance_any().text}" # type: ignore 3556 after = exp.var(text) 3557 else: 3558 after = None 3559 3560 if self._match_text_seq("PATTERN"): 3561 self._match_l_paren() 3562 3563 if not self._curr: 3564 self.raise_error("Expecting )", self._curr) 3565 3566 paren = 1 3567 start = self._curr 3568 3569 while self._curr and paren > 0: 3570 if self._curr.token_type == TokenType.L_PAREN: 3571 paren += 1 3572 if self._curr.token_type == TokenType.R_PAREN: 3573 paren -= 1 3574 3575 end = self._prev 3576 self._advance() 3577 3578 if paren > 0: 3579 self.raise_error("Expecting )", self._curr) 3580 3581 pattern = exp.var(self._find_sql(start, end)) 3582 else: 3583 pattern = None 3584 3585 define = ( 3586 self._parse_csv(self._parse_name_as_expression) 3587 if self._match_text_seq("DEFINE") 3588 else None 3589 ) 3590 3591 self._match_r_paren() 3592 3593 return self.expression( 3594 exp.MatchRecognize, 3595 partition_by=partition, 3596 order=order, 3597 measures=measures, 3598 rows=rows, 3599 after=after, 3600 pattern=pattern, 3601 define=define, 3602 alias=self._parse_table_alias(), 3603 ) 3604 3605 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3606 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3607 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3608 cross_apply = False 3609 3610 if cross_apply is not None: 3611 this = self._parse_select(table=True) 3612 view = None 3613 outer = None 3614 elif self._match(TokenType.LATERAL): 3615 this = self._parse_select(table=True) 3616 view = self._match(TokenType.VIEW) 3617 outer = self._match(TokenType.OUTER) 3618 else: 3619 return None 3620 3621 if not this: 3622 this = ( 3623 self._parse_unnest() 3624 or self._parse_function() 3625 or self._parse_id_var(any_token=False) 3626 ) 3627 3628 while self._match(TokenType.DOT): 3629 this = exp.Dot( 3630 this=this, 3631 expression=self._parse_function() or self._parse_id_var(any_token=False), 3632 ) 3633 3634 ordinality: t.Optional[bool] = None 3635 3636 if view: 3637 table = self._parse_id_var(any_token=False) 3638 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3639 table_alias: t.Optional[exp.TableAlias] = self.expression( 3640 exp.TableAlias, this=table, columns=columns 3641 ) 3642 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3643 # We move the alias from the lateral's child node to the lateral itself 3644 table_alias = this.args["alias"].pop() 3645 else: 3646 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3647 table_alias = self._parse_table_alias() 3648 3649 return self.expression( 3650 exp.Lateral, 3651 this=this, 3652 view=view, 3653 outer=outer, 3654 alias=table_alias, 3655 cross_apply=cross_apply, 3656 ordinality=ordinality, 3657 ) 3658 3659 def _parse_join_parts( 3660 self, 3661 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3662 return ( 3663 self._match_set(self.JOIN_METHODS) and self._prev, 3664 self._match_set(self.JOIN_SIDES) and self._prev, 3665 self._match_set(self.JOIN_KINDS) and self._prev, 3666 ) 3667 3668 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3669 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3670 this = self._parse_column() 3671 if isinstance(this, exp.Column): 3672 return this.this 3673 return this 3674 3675 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3676 3677 def _parse_join( 3678 self, skip_join_token: bool = False, parse_bracket: bool = False 3679 ) -> t.Optional[exp.Join]: 3680 if self._match(TokenType.COMMA): 3681 table = self._try_parse(self._parse_table) 3682 if table: 3683 return self.expression(exp.Join, this=table) 3684 return None 3685 3686 index = self._index 3687 method, side, kind = self._parse_join_parts() 3688 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3689 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3690 3691 if not skip_join_token and not join: 3692 self._retreat(index) 3693 kind = None 3694 method = None 3695 side = None 3696 3697 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3698 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3699 3700 if not skip_join_token and not join and not outer_apply and not cross_apply: 3701 return None 3702 3703 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3704 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3705 kwargs["expressions"] = self._parse_csv( 3706 lambda: self._parse_table(parse_bracket=parse_bracket) 3707 ) 3708 3709 if method: 3710 kwargs["method"] = method.text 3711 if side: 3712 kwargs["side"] = side.text 3713 if kind: 3714 kwargs["kind"] = kind.text 3715 if hint: 3716 kwargs["hint"] = hint 3717 3718 if self._match(TokenType.MATCH_CONDITION): 3719 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3720 3721 if self._match(TokenType.ON): 3722 kwargs["on"] = self._parse_assignment() 3723 elif self._match(TokenType.USING): 3724 kwargs["using"] = self._parse_using_identifiers() 3725 elif ( 3726 not (outer_apply or cross_apply) 3727 and not isinstance(kwargs["this"], exp.Unnest) 3728 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3729 ): 3730 index = self._index 3731 joins: t.Optional[list] = list(self._parse_joins()) 3732 3733 if joins and self._match(TokenType.ON): 3734 kwargs["on"] = self._parse_assignment() 3735 elif joins and self._match(TokenType.USING): 3736 kwargs["using"] = self._parse_using_identifiers() 3737 else: 3738 joins = None 3739 self._retreat(index) 3740 3741 kwargs["this"].set("joins", joins if joins else None) 3742 3743 kwargs["pivots"] = self._parse_pivots() 3744 3745 comments = [c for token in (method, side, kind) if token for c in token.comments] 3746 return self.expression(exp.Join, comments=comments, **kwargs) 3747 3748 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3749 this = self._parse_assignment() 3750 3751 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3752 return this 3753 3754 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3755 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3756 3757 return this 3758 3759 def _parse_index_params(self) -> exp.IndexParameters: 3760 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3761 3762 if self._match(TokenType.L_PAREN, advance=False): 3763 columns = self._parse_wrapped_csv(self._parse_with_operator) 3764 else: 3765 columns = None 3766 3767 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3768 partition_by = self._parse_partition_by() 3769 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3770 tablespace = ( 3771 self._parse_var(any_token=True) 3772 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3773 else None 3774 ) 3775 where = self._parse_where() 3776 3777 on = self._parse_field() if self._match(TokenType.ON) else None 3778 3779 return self.expression( 3780 exp.IndexParameters, 3781 using=using, 3782 columns=columns, 3783 include=include, 3784 partition_by=partition_by, 3785 where=where, 3786 with_storage=with_storage, 3787 tablespace=tablespace, 3788 on=on, 3789 ) 3790 3791 def _parse_index( 3792 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3793 ) -> t.Optional[exp.Index]: 3794 if index or anonymous: 3795 unique = None 3796 primary = None 3797 amp = None 3798 3799 self._match(TokenType.ON) 3800 self._match(TokenType.TABLE) # hive 3801 table = self._parse_table_parts(schema=True) 3802 else: 3803 unique = self._match(TokenType.UNIQUE) 3804 primary = self._match_text_seq("PRIMARY") 3805 amp = self._match_text_seq("AMP") 3806 3807 if not self._match(TokenType.INDEX): 3808 return None 3809 3810 index = self._parse_id_var() 3811 table = None 3812 3813 params = self._parse_index_params() 3814 3815 return self.expression( 3816 exp.Index, 3817 this=index, 3818 table=table, 3819 unique=unique, 3820 primary=primary, 3821 amp=amp, 3822 params=params, 3823 ) 3824 3825 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3826 hints: t.List[exp.Expression] = [] 3827 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3828 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3829 hints.append( 3830 self.expression( 3831 exp.WithTableHint, 3832 expressions=self._parse_csv( 3833 lambda: self._parse_function() or self._parse_var(any_token=True) 3834 ), 3835 ) 3836 ) 3837 self._match_r_paren() 3838 else: 3839 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3840 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3841 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3842 3843 self._match_set((TokenType.INDEX, TokenType.KEY)) 3844 if self._match(TokenType.FOR): 3845 hint.set("target", self._advance_any() and self._prev.text.upper()) 3846 3847 hint.set("expressions", self._parse_wrapped_id_vars()) 3848 hints.append(hint) 3849 3850 return hints or None 3851 3852 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3853 return ( 3854 (not schema and self._parse_function(optional_parens=False)) 3855 or self._parse_id_var(any_token=False) 3856 or self._parse_string_as_identifier() 3857 or self._parse_placeholder() 3858 ) 3859 3860 def _parse_table_parts( 3861 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3862 ) -> exp.Table: 3863 catalog = None 3864 db = None 3865 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3866 3867 while self._match(TokenType.DOT): 3868 if catalog: 3869 # This allows nesting the table in arbitrarily many dot expressions if needed 3870 table = self.expression( 3871 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3872 ) 3873 else: 3874 catalog = db 3875 db = table 3876 # "" used for tsql FROM a..b case 3877 table = self._parse_table_part(schema=schema) or "" 3878 3879 if ( 3880 wildcard 3881 and self._is_connected() 3882 and (isinstance(table, exp.Identifier) or not table) 3883 and self._match(TokenType.STAR) 3884 ): 3885 if isinstance(table, exp.Identifier): 3886 table.args["this"] += "*" 3887 else: 3888 table = exp.Identifier(this="*") 3889 3890 # We bubble up comments from the Identifier to the Table 3891 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3892 3893 if is_db_reference: 3894 catalog = db 3895 db = table 3896 table = None 3897 3898 if not table and not is_db_reference: 3899 self.raise_error(f"Expected table name but got {self._curr}") 3900 if not db and is_db_reference: 3901 self.raise_error(f"Expected database name but got {self._curr}") 3902 3903 table = self.expression( 3904 exp.Table, 3905 comments=comments, 3906 this=table, 3907 db=db, 3908 catalog=catalog, 3909 ) 3910 3911 changes = self._parse_changes() 3912 if changes: 3913 table.set("changes", changes) 3914 3915 at_before = self._parse_historical_data() 3916 if at_before: 3917 table.set("when", at_before) 3918 3919 pivots = self._parse_pivots() 3920 if pivots: 3921 table.set("pivots", pivots) 3922 3923 return table 3924 3925 def _parse_table( 3926 self, 3927 schema: bool = False, 3928 joins: bool = False, 3929 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3930 parse_bracket: bool = False, 3931 is_db_reference: bool = False, 3932 parse_partition: bool = False, 3933 ) -> t.Optional[exp.Expression]: 3934 lateral = self._parse_lateral() 3935 if lateral: 3936 return lateral 3937 3938 unnest = self._parse_unnest() 3939 if unnest: 3940 return unnest 3941 3942 values = self._parse_derived_table_values() 3943 if values: 3944 return values 3945 3946 subquery = self._parse_select(table=True) 3947 if subquery: 3948 if not subquery.args.get("pivots"): 3949 subquery.set("pivots", self._parse_pivots()) 3950 return subquery 3951 3952 bracket = parse_bracket and self._parse_bracket(None) 3953 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3954 3955 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3956 self._parse_table 3957 ) 3958 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3959 3960 only = self._match(TokenType.ONLY) 3961 3962 this = t.cast( 3963 exp.Expression, 3964 bracket 3965 or rows_from 3966 or self._parse_bracket( 3967 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3968 ), 3969 ) 3970 3971 if only: 3972 this.set("only", only) 3973 3974 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3975 self._match_text_seq("*") 3976 3977 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3978 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3979 this.set("partition", self._parse_partition()) 3980 3981 if schema: 3982 return self._parse_schema(this=this) 3983 3984 version = self._parse_version() 3985 3986 if version: 3987 this.set("version", version) 3988 3989 if self.dialect.ALIAS_POST_TABLESAMPLE: 3990 this.set("sample", self._parse_table_sample()) 3991 3992 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3993 if alias: 3994 this.set("alias", alias) 3995 3996 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3997 return self.expression( 3998 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3999 ) 4000 4001 this.set("hints", self._parse_table_hints()) 4002 4003 if not this.args.get("pivots"): 4004 this.set("pivots", self._parse_pivots()) 4005 4006 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4007 this.set("sample", self._parse_table_sample()) 4008 4009 if joins: 4010 for join in self._parse_joins(): 4011 this.append("joins", join) 4012 4013 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4014 this.set("ordinality", True) 4015 this.set("alias", self._parse_table_alias()) 4016 4017 return this 4018 4019 def _parse_version(self) -> t.Optional[exp.Version]: 4020 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4021 this = "TIMESTAMP" 4022 elif self._match(TokenType.VERSION_SNAPSHOT): 4023 this = "VERSION" 4024 else: 4025 return None 4026 4027 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4028 kind = self._prev.text.upper() 4029 start = self._parse_bitwise() 4030 self._match_texts(("TO", "AND")) 4031 end = self._parse_bitwise() 4032 expression: t.Optional[exp.Expression] = self.expression( 4033 exp.Tuple, expressions=[start, end] 4034 ) 4035 elif self._match_text_seq("CONTAINED", "IN"): 4036 kind = "CONTAINED IN" 4037 expression = self.expression( 4038 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4039 ) 4040 elif self._match(TokenType.ALL): 4041 kind = "ALL" 4042 expression = None 4043 else: 4044 self._match_text_seq("AS", "OF") 4045 kind = "AS OF" 4046 expression = self._parse_type() 4047 4048 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4049 4050 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4051 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4052 index = self._index 4053 historical_data = None 4054 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4055 this = self._prev.text.upper() 4056 kind = ( 4057 self._match(TokenType.L_PAREN) 4058 and self._match_texts(self.HISTORICAL_DATA_KIND) 4059 and self._prev.text.upper() 4060 ) 4061 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4062 4063 if expression: 4064 self._match_r_paren() 4065 historical_data = self.expression( 4066 exp.HistoricalData, this=this, kind=kind, expression=expression 4067 ) 4068 else: 4069 self._retreat(index) 4070 4071 return historical_data 4072 4073 def _parse_changes(self) -> t.Optional[exp.Changes]: 4074 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4075 return None 4076 4077 information = self._parse_var(any_token=True) 4078 self._match_r_paren() 4079 4080 return self.expression( 4081 exp.Changes, 4082 information=information, 4083 at_before=self._parse_historical_data(), 4084 end=self._parse_historical_data(), 4085 ) 4086 4087 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4088 if not self._match(TokenType.UNNEST): 4089 return None 4090 4091 expressions = self._parse_wrapped_csv(self._parse_equality) 4092 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4093 4094 alias = self._parse_table_alias() if with_alias else None 4095 4096 if alias: 4097 if self.dialect.UNNEST_COLUMN_ONLY: 4098 if alias.args.get("columns"): 4099 self.raise_error("Unexpected extra column alias in unnest.") 4100 4101 alias.set("columns", [alias.this]) 4102 alias.set("this", None) 4103 4104 columns = alias.args.get("columns") or [] 4105 if offset and len(expressions) < len(columns): 4106 offset = columns.pop() 4107 4108 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4109 self._match(TokenType.ALIAS) 4110 offset = self._parse_id_var( 4111 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4112 ) or exp.to_identifier("offset") 4113 4114 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4115 4116 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4117 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4118 if not is_derived and not ( 4119 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4120 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4121 ): 4122 return None 4123 4124 expressions = self._parse_csv(self._parse_value) 4125 alias = self._parse_table_alias() 4126 4127 if is_derived: 4128 self._match_r_paren() 4129 4130 return self.expression( 4131 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4132 ) 4133 4134 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4135 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4136 as_modifier and self._match_text_seq("USING", "SAMPLE") 4137 ): 4138 return None 4139 4140 bucket_numerator = None 4141 bucket_denominator = None 4142 bucket_field = None 4143 percent = None 4144 size = None 4145 seed = None 4146 4147 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4148 matched_l_paren = self._match(TokenType.L_PAREN) 4149 4150 if self.TABLESAMPLE_CSV: 4151 num = None 4152 expressions = self._parse_csv(self._parse_primary) 4153 else: 4154 expressions = None 4155 num = ( 4156 self._parse_factor() 4157 if self._match(TokenType.NUMBER, advance=False) 4158 else self._parse_primary() or self._parse_placeholder() 4159 ) 4160 4161 if self._match_text_seq("BUCKET"): 4162 bucket_numerator = self._parse_number() 4163 self._match_text_seq("OUT", "OF") 4164 bucket_denominator = bucket_denominator = self._parse_number() 4165 self._match(TokenType.ON) 4166 bucket_field = self._parse_field() 4167 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4168 percent = num 4169 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4170 size = num 4171 else: 4172 percent = num 4173 4174 if matched_l_paren: 4175 self._match_r_paren() 4176 4177 if self._match(TokenType.L_PAREN): 4178 method = self._parse_var(upper=True) 4179 seed = self._match(TokenType.COMMA) and self._parse_number() 4180 self._match_r_paren() 4181 elif self._match_texts(("SEED", "REPEATABLE")): 4182 seed = self._parse_wrapped(self._parse_number) 4183 4184 if not method and self.DEFAULT_SAMPLING_METHOD: 4185 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4186 4187 return self.expression( 4188 exp.TableSample, 4189 expressions=expressions, 4190 method=method, 4191 bucket_numerator=bucket_numerator, 4192 bucket_denominator=bucket_denominator, 4193 bucket_field=bucket_field, 4194 percent=percent, 4195 size=size, 4196 seed=seed, 4197 ) 4198 4199 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4200 return list(iter(self._parse_pivot, None)) or None 4201 4202 def _parse_joins(self) -> t.Iterator[exp.Join]: 4203 return iter(self._parse_join, None) 4204 4205 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4206 if not self._match(TokenType.INTO): 4207 return None 4208 4209 return self.expression( 4210 exp.UnpivotColumns, 4211 this=self._match_text_seq("NAME") and self._parse_column(), 4212 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4213 ) 4214 4215 # https://duckdb.org/docs/sql/statements/pivot 4216 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4217 def _parse_on() -> t.Optional[exp.Expression]: 4218 this = self._parse_bitwise() 4219 4220 if self._match(TokenType.IN): 4221 # PIVOT ... ON col IN (row_val1, row_val2) 4222 return self._parse_in(this) 4223 if self._match(TokenType.ALIAS, advance=False): 4224 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4225 return self._parse_alias(this) 4226 4227 return this 4228 4229 this = self._parse_table() 4230 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4231 into = self._parse_unpivot_columns() 4232 using = self._match(TokenType.USING) and self._parse_csv( 4233 lambda: self._parse_alias(self._parse_function()) 4234 ) 4235 group = self._parse_group() 4236 4237 return self.expression( 4238 exp.Pivot, 4239 this=this, 4240 expressions=expressions, 4241 using=using, 4242 group=group, 4243 unpivot=is_unpivot, 4244 into=into, 4245 ) 4246 4247 def _parse_pivot_in(self) -> exp.In: 4248 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4249 this = self._parse_select_or_expression() 4250 4251 self._match(TokenType.ALIAS) 4252 alias = self._parse_bitwise() 4253 if alias: 4254 if isinstance(alias, exp.Column) and not alias.db: 4255 alias = alias.this 4256 return self.expression(exp.PivotAlias, this=this, alias=alias) 4257 4258 return this 4259 4260 value = self._parse_column() 4261 4262 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4263 self.raise_error("Expecting IN (") 4264 4265 if self._match(TokenType.ANY): 4266 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4267 else: 4268 exprs = self._parse_csv(_parse_aliased_expression) 4269 4270 self._match_r_paren() 4271 return self.expression(exp.In, this=value, expressions=exprs) 4272 4273 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4274 index = self._index 4275 include_nulls = None 4276 4277 if self._match(TokenType.PIVOT): 4278 unpivot = False 4279 elif self._match(TokenType.UNPIVOT): 4280 unpivot = True 4281 4282 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4283 if self._match_text_seq("INCLUDE", "NULLS"): 4284 include_nulls = True 4285 elif self._match_text_seq("EXCLUDE", "NULLS"): 4286 include_nulls = False 4287 else: 4288 return None 4289 4290 expressions = [] 4291 4292 if not self._match(TokenType.L_PAREN): 4293 self._retreat(index) 4294 return None 4295 4296 if unpivot: 4297 expressions = self._parse_csv(self._parse_column) 4298 else: 4299 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4300 4301 if not expressions: 4302 self.raise_error("Failed to parse PIVOT's aggregation list") 4303 4304 if not self._match(TokenType.FOR): 4305 self.raise_error("Expecting FOR") 4306 4307 fields = [] 4308 while True: 4309 field = self._try_parse(self._parse_pivot_in) 4310 if not field: 4311 break 4312 fields.append(field) 4313 4314 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4315 self._parse_bitwise 4316 ) 4317 4318 group = self._parse_group() 4319 4320 self._match_r_paren() 4321 4322 pivot = self.expression( 4323 exp.Pivot, 4324 expressions=expressions, 4325 fields=fields, 4326 unpivot=unpivot, 4327 include_nulls=include_nulls, 4328 default_on_null=default_on_null, 4329 group=group, 4330 ) 4331 4332 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4333 pivot.set("alias", self._parse_table_alias()) 4334 4335 if not unpivot: 4336 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4337 4338 columns: t.List[exp.Expression] = [] 4339 all_fields = [] 4340 for pivot_field in pivot.fields: 4341 pivot_field_expressions = pivot_field.expressions 4342 4343 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4344 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4345 continue 4346 4347 all_fields.append( 4348 [ 4349 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4350 for fld in pivot_field_expressions 4351 ] 4352 ) 4353 4354 if all_fields: 4355 if names: 4356 all_fields.append(names) 4357 4358 # Generate all possible combinations of the pivot columns 4359 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4360 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4361 for fld_parts_tuple in itertools.product(*all_fields): 4362 fld_parts = list(fld_parts_tuple) 4363 4364 if names and self.PREFIXED_PIVOT_COLUMNS: 4365 # Move the "name" to the front of the list 4366 fld_parts.insert(0, fld_parts.pop(-1)) 4367 4368 columns.append(exp.to_identifier("_".join(fld_parts))) 4369 4370 pivot.set("columns", columns) 4371 4372 return pivot 4373 4374 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4375 return [agg.alias for agg in aggregations if agg.alias] 4376 4377 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4378 if not skip_where_token and not self._match(TokenType.PREWHERE): 4379 return None 4380 4381 return self.expression( 4382 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4383 ) 4384 4385 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4386 if not skip_where_token and not self._match(TokenType.WHERE): 4387 return None 4388 4389 return self.expression( 4390 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4391 ) 4392 4393 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4394 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4395 return None 4396 4397 elements: t.Dict[str, t.Any] = defaultdict(list) 4398 4399 if self._match(TokenType.ALL): 4400 elements["all"] = True 4401 elif self._match(TokenType.DISTINCT): 4402 elements["all"] = False 4403 4404 while True: 4405 index = self._index 4406 4407 elements["expressions"].extend( 4408 self._parse_csv( 4409 lambda: None 4410 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4411 else self._parse_assignment() 4412 ) 4413 ) 4414 4415 before_with_index = self._index 4416 with_prefix = self._match(TokenType.WITH) 4417 4418 if self._match(TokenType.ROLLUP): 4419 elements["rollup"].append( 4420 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4421 ) 4422 elif self._match(TokenType.CUBE): 4423 elements["cube"].append( 4424 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4425 ) 4426 elif self._match(TokenType.GROUPING_SETS): 4427 elements["grouping_sets"].append( 4428 self.expression( 4429 exp.GroupingSets, 4430 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4431 ) 4432 ) 4433 elif self._match_text_seq("TOTALS"): 4434 elements["totals"] = True # type: ignore 4435 4436 if before_with_index <= self._index <= before_with_index + 1: 4437 self._retreat(before_with_index) 4438 break 4439 4440 if index == self._index: 4441 break 4442 4443 return self.expression(exp.Group, **elements) # type: ignore 4444 4445 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4446 return self.expression( 4447 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4448 ) 4449 4450 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4451 if self._match(TokenType.L_PAREN): 4452 grouping_set = self._parse_csv(self._parse_column) 4453 self._match_r_paren() 4454 return self.expression(exp.Tuple, expressions=grouping_set) 4455 4456 return self._parse_column() 4457 4458 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4459 if not skip_having_token and not self._match(TokenType.HAVING): 4460 return None 4461 return self.expression(exp.Having, this=self._parse_assignment()) 4462 4463 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4464 if not self._match(TokenType.QUALIFY): 4465 return None 4466 return self.expression(exp.Qualify, this=self._parse_assignment()) 4467 4468 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4469 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4470 exp.Prior, this=self._parse_bitwise() 4471 ) 4472 connect = self._parse_assignment() 4473 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4474 return connect 4475 4476 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4477 if skip_start_token: 4478 start = None 4479 elif self._match(TokenType.START_WITH): 4480 start = self._parse_assignment() 4481 else: 4482 return None 4483 4484 self._match(TokenType.CONNECT_BY) 4485 nocycle = self._match_text_seq("NOCYCLE") 4486 connect = self._parse_connect_with_prior() 4487 4488 if not start and self._match(TokenType.START_WITH): 4489 start = self._parse_assignment() 4490 4491 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4492 4493 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4494 this = self._parse_id_var(any_token=True) 4495 if self._match(TokenType.ALIAS): 4496 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4497 return this 4498 4499 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4500 if self._match_text_seq("INTERPOLATE"): 4501 return self._parse_wrapped_csv(self._parse_name_as_expression) 4502 return None 4503 4504 def _parse_order( 4505 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4506 ) -> t.Optional[exp.Expression]: 4507 siblings = None 4508 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4509 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4510 return this 4511 4512 siblings = True 4513 4514 return self.expression( 4515 exp.Order, 4516 this=this, 4517 expressions=self._parse_csv(self._parse_ordered), 4518 siblings=siblings, 4519 ) 4520 4521 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4522 if not self._match(token): 4523 return None 4524 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4525 4526 def _parse_ordered( 4527 self, parse_method: t.Optional[t.Callable] = None 4528 ) -> t.Optional[exp.Ordered]: 4529 this = parse_method() if parse_method else self._parse_assignment() 4530 if not this: 4531 return None 4532 4533 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4534 this = exp.var("ALL") 4535 4536 asc = self._match(TokenType.ASC) 4537 desc = self._match(TokenType.DESC) or (asc and False) 4538 4539 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4540 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4541 4542 nulls_first = is_nulls_first or False 4543 explicitly_null_ordered = is_nulls_first or is_nulls_last 4544 4545 if ( 4546 not explicitly_null_ordered 4547 and ( 4548 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4549 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4550 ) 4551 and self.dialect.NULL_ORDERING != "nulls_are_last" 4552 ): 4553 nulls_first = True 4554 4555 if self._match_text_seq("WITH", "FILL"): 4556 with_fill = self.expression( 4557 exp.WithFill, 4558 **{ # type: ignore 4559 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4560 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4561 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4562 "interpolate": self._parse_interpolate(), 4563 }, 4564 ) 4565 else: 4566 with_fill = None 4567 4568 return self.expression( 4569 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4570 ) 4571 4572 def _parse_limit_options(self) -> exp.LimitOptions: 4573 percent = self._match(TokenType.PERCENT) 4574 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4575 self._match_text_seq("ONLY") 4576 with_ties = self._match_text_seq("WITH", "TIES") 4577 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4578 4579 def _parse_limit( 4580 self, 4581 this: t.Optional[exp.Expression] = None, 4582 top: bool = False, 4583 skip_limit_token: bool = False, 4584 ) -> t.Optional[exp.Expression]: 4585 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4586 comments = self._prev_comments 4587 if top: 4588 limit_paren = self._match(TokenType.L_PAREN) 4589 expression = self._parse_term() if limit_paren else self._parse_number() 4590 4591 if limit_paren: 4592 self._match_r_paren() 4593 4594 limit_options = self._parse_limit_options() 4595 else: 4596 limit_options = None 4597 expression = self._parse_term() 4598 4599 if self._match(TokenType.COMMA): 4600 offset = expression 4601 expression = self._parse_term() 4602 else: 4603 offset = None 4604 4605 limit_exp = self.expression( 4606 exp.Limit, 4607 this=this, 4608 expression=expression, 4609 offset=offset, 4610 comments=comments, 4611 limit_options=limit_options, 4612 expressions=self._parse_limit_by(), 4613 ) 4614 4615 return limit_exp 4616 4617 if self._match(TokenType.FETCH): 4618 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4619 direction = self._prev.text.upper() if direction else "FIRST" 4620 4621 count = self._parse_field(tokens=self.FETCH_TOKENS) 4622 4623 return self.expression( 4624 exp.Fetch, 4625 direction=direction, 4626 count=count, 4627 limit_options=self._parse_limit_options(), 4628 ) 4629 4630 return this 4631 4632 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4633 if not self._match(TokenType.OFFSET): 4634 return this 4635 4636 count = self._parse_term() 4637 self._match_set((TokenType.ROW, TokenType.ROWS)) 4638 4639 return self.expression( 4640 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4641 ) 4642 4643 def _can_parse_limit_or_offset(self) -> bool: 4644 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4645 return False 4646 4647 index = self._index 4648 result = bool( 4649 self._try_parse(self._parse_limit, retreat=True) 4650 or self._try_parse(self._parse_offset, retreat=True) 4651 ) 4652 self._retreat(index) 4653 return result 4654 4655 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4656 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4657 4658 def _parse_locks(self) -> t.List[exp.Lock]: 4659 locks = [] 4660 while True: 4661 if self._match_text_seq("FOR", "UPDATE"): 4662 update = True 4663 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4664 "LOCK", "IN", "SHARE", "MODE" 4665 ): 4666 update = False 4667 else: 4668 break 4669 4670 expressions = None 4671 if self._match_text_seq("OF"): 4672 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4673 4674 wait: t.Optional[bool | exp.Expression] = None 4675 if self._match_text_seq("NOWAIT"): 4676 wait = True 4677 elif self._match_text_seq("WAIT"): 4678 wait = self._parse_primary() 4679 elif self._match_text_seq("SKIP", "LOCKED"): 4680 wait = False 4681 4682 locks.append( 4683 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4684 ) 4685 4686 return locks 4687 4688 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4689 start = self._index 4690 _, side_token, kind_token = self._parse_join_parts() 4691 4692 side = side_token.text if side_token else None 4693 kind = kind_token.text if kind_token else None 4694 4695 if not self._match_set(self.SET_OPERATIONS): 4696 self._retreat(start) 4697 return None 4698 4699 token_type = self._prev.token_type 4700 4701 if token_type == TokenType.UNION: 4702 operation: t.Type[exp.SetOperation] = exp.Union 4703 elif token_type == TokenType.EXCEPT: 4704 operation = exp.Except 4705 else: 4706 operation = exp.Intersect 4707 4708 comments = self._prev.comments 4709 4710 if self._match(TokenType.DISTINCT): 4711 distinct: t.Optional[bool] = True 4712 elif self._match(TokenType.ALL): 4713 distinct = False 4714 else: 4715 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4716 if distinct is None: 4717 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4718 4719 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4720 "STRICT", "CORRESPONDING" 4721 ) 4722 if self._match_text_seq("CORRESPONDING"): 4723 by_name = True 4724 if not side and not kind: 4725 kind = "INNER" 4726 4727 on_column_list = None 4728 if by_name and self._match_texts(("ON", "BY")): 4729 on_column_list = self._parse_wrapped_csv(self._parse_column) 4730 4731 expression = self._parse_select(nested=True, parse_set_operation=False) 4732 4733 return self.expression( 4734 operation, 4735 comments=comments, 4736 this=this, 4737 distinct=distinct, 4738 by_name=by_name, 4739 expression=expression, 4740 side=side, 4741 kind=kind, 4742 on=on_column_list, 4743 ) 4744 4745 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4746 while this: 4747 setop = self.parse_set_operation(this) 4748 if not setop: 4749 break 4750 this = setop 4751 4752 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4753 expression = this.expression 4754 4755 if expression: 4756 for arg in self.SET_OP_MODIFIERS: 4757 expr = expression.args.get(arg) 4758 if expr: 4759 this.set(arg, expr.pop()) 4760 4761 return this 4762 4763 def _parse_expression(self) -> t.Optional[exp.Expression]: 4764 return self._parse_alias(self._parse_assignment()) 4765 4766 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4767 this = self._parse_disjunction() 4768 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4769 # This allows us to parse <non-identifier token> := <expr> 4770 this = exp.column( 4771 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4772 ) 4773 4774 while self._match_set(self.ASSIGNMENT): 4775 if isinstance(this, exp.Column) and len(this.parts) == 1: 4776 this = this.this 4777 4778 this = self.expression( 4779 self.ASSIGNMENT[self._prev.token_type], 4780 this=this, 4781 comments=self._prev_comments, 4782 expression=self._parse_assignment(), 4783 ) 4784 4785 return this 4786 4787 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4788 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4789 4790 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4791 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4792 4793 def _parse_equality(self) -> t.Optional[exp.Expression]: 4794 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4795 4796 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4797 return self._parse_tokens(self._parse_range, self.COMPARISON) 4798 4799 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4800 this = this or self._parse_bitwise() 4801 negate = self._match(TokenType.NOT) 4802 4803 if self._match_set(self.RANGE_PARSERS): 4804 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4805 if not expression: 4806 return this 4807 4808 this = expression 4809 elif self._match(TokenType.ISNULL): 4810 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4811 4812 # Postgres supports ISNULL and NOTNULL for conditions. 4813 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4814 if self._match(TokenType.NOTNULL): 4815 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4816 this = self.expression(exp.Not, this=this) 4817 4818 if negate: 4819 this = self._negate_range(this) 4820 4821 if self._match(TokenType.IS): 4822 this = self._parse_is(this) 4823 4824 return this 4825 4826 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4827 if not this: 4828 return this 4829 4830 return self.expression(exp.Not, this=this) 4831 4832 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4833 index = self._index - 1 4834 negate = self._match(TokenType.NOT) 4835 4836 if self._match_text_seq("DISTINCT", "FROM"): 4837 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4838 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4839 4840 if self._match(TokenType.JSON): 4841 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4842 4843 if self._match_text_seq("WITH"): 4844 _with = True 4845 elif self._match_text_seq("WITHOUT"): 4846 _with = False 4847 else: 4848 _with = None 4849 4850 unique = self._match(TokenType.UNIQUE) 4851 self._match_text_seq("KEYS") 4852 expression: t.Optional[exp.Expression] = self.expression( 4853 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4854 ) 4855 else: 4856 expression = self._parse_primary() or self._parse_null() 4857 if not expression: 4858 self._retreat(index) 4859 return None 4860 4861 this = self.expression(exp.Is, this=this, expression=expression) 4862 return self.expression(exp.Not, this=this) if negate else this 4863 4864 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4865 unnest = self._parse_unnest(with_alias=False) 4866 if unnest: 4867 this = self.expression(exp.In, this=this, unnest=unnest) 4868 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4869 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4870 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4871 4872 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4873 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4874 else: 4875 this = self.expression(exp.In, this=this, expressions=expressions) 4876 4877 if matched_l_paren: 4878 self._match_r_paren(this) 4879 elif not self._match(TokenType.R_BRACKET, expression=this): 4880 self.raise_error("Expecting ]") 4881 else: 4882 this = self.expression(exp.In, this=this, field=self._parse_column()) 4883 4884 return this 4885 4886 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4887 low = self._parse_bitwise() 4888 self._match(TokenType.AND) 4889 high = self._parse_bitwise() 4890 return self.expression(exp.Between, this=this, low=low, high=high) 4891 4892 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4893 if not self._match(TokenType.ESCAPE): 4894 return this 4895 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4896 4897 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4898 index = self._index 4899 4900 if not self._match(TokenType.INTERVAL) and match_interval: 4901 return None 4902 4903 if self._match(TokenType.STRING, advance=False): 4904 this = self._parse_primary() 4905 else: 4906 this = self._parse_term() 4907 4908 if not this or ( 4909 isinstance(this, exp.Column) 4910 and not this.table 4911 and not this.this.quoted 4912 and this.name.upper() == "IS" 4913 ): 4914 self._retreat(index) 4915 return None 4916 4917 unit = self._parse_function() or ( 4918 not self._match(TokenType.ALIAS, advance=False) 4919 and self._parse_var(any_token=True, upper=True) 4920 ) 4921 4922 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4923 # each INTERVAL expression into this canonical form so it's easy to transpile 4924 if this and this.is_number: 4925 this = exp.Literal.string(this.to_py()) 4926 elif this and this.is_string: 4927 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4928 if parts and unit: 4929 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4930 unit = None 4931 self._retreat(self._index - 1) 4932 4933 if len(parts) == 1: 4934 this = exp.Literal.string(parts[0][0]) 4935 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4936 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4937 unit = self.expression( 4938 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4939 ) 4940 4941 interval = self.expression(exp.Interval, this=this, unit=unit) 4942 4943 index = self._index 4944 self._match(TokenType.PLUS) 4945 4946 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4947 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4948 return self.expression( 4949 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4950 ) 4951 4952 self._retreat(index) 4953 return interval 4954 4955 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4956 this = self._parse_term() 4957 4958 while True: 4959 if self._match_set(self.BITWISE): 4960 this = self.expression( 4961 self.BITWISE[self._prev.token_type], 4962 this=this, 4963 expression=self._parse_term(), 4964 ) 4965 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4966 this = self.expression( 4967 exp.DPipe, 4968 this=this, 4969 expression=self._parse_term(), 4970 safe=not self.dialect.STRICT_STRING_CONCAT, 4971 ) 4972 elif self._match(TokenType.DQMARK): 4973 this = self.expression( 4974 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4975 ) 4976 elif self._match_pair(TokenType.LT, TokenType.LT): 4977 this = self.expression( 4978 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4979 ) 4980 elif self._match_pair(TokenType.GT, TokenType.GT): 4981 this = self.expression( 4982 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4983 ) 4984 else: 4985 break 4986 4987 return this 4988 4989 def _parse_term(self) -> t.Optional[exp.Expression]: 4990 this = self._parse_factor() 4991 4992 while self._match_set(self.TERM): 4993 klass = self.TERM[self._prev.token_type] 4994 comments = self._prev_comments 4995 expression = self._parse_factor() 4996 4997 this = self.expression(klass, this=this, comments=comments, expression=expression) 4998 4999 if isinstance(this, exp.Collate): 5000 expr = this.expression 5001 5002 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5003 # fallback to Identifier / Var 5004 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5005 ident = expr.this 5006 if isinstance(ident, exp.Identifier): 5007 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5008 5009 return this 5010 5011 def _parse_factor(self) -> t.Optional[exp.Expression]: 5012 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5013 this = parse_method() 5014 5015 while self._match_set(self.FACTOR): 5016 klass = self.FACTOR[self._prev.token_type] 5017 comments = self._prev_comments 5018 expression = parse_method() 5019 5020 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5021 self._retreat(self._index - 1) 5022 return this 5023 5024 this = self.expression(klass, this=this, comments=comments, expression=expression) 5025 5026 if isinstance(this, exp.Div): 5027 this.args["typed"] = self.dialect.TYPED_DIVISION 5028 this.args["safe"] = self.dialect.SAFE_DIVISION 5029 5030 return this 5031 5032 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5033 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5034 5035 def _parse_unary(self) -> t.Optional[exp.Expression]: 5036 if self._match_set(self.UNARY_PARSERS): 5037 return self.UNARY_PARSERS[self._prev.token_type](self) 5038 return self._parse_at_time_zone(self._parse_type()) 5039 5040 def _parse_type( 5041 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5042 ) -> t.Optional[exp.Expression]: 5043 interval = parse_interval and self._parse_interval() 5044 if interval: 5045 return interval 5046 5047 index = self._index 5048 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5049 5050 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5051 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5052 if isinstance(data_type, exp.Cast): 5053 # This constructor can contain ops directly after it, for instance struct unnesting: 5054 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5055 return self._parse_column_ops(data_type) 5056 5057 if data_type: 5058 index2 = self._index 5059 this = self._parse_primary() 5060 5061 if isinstance(this, exp.Literal): 5062 this = self._parse_column_ops(this) 5063 5064 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5065 if parser: 5066 return parser(self, this, data_type) 5067 5068 return self.expression(exp.Cast, this=this, to=data_type) 5069 5070 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5071 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5072 # 5073 # If the index difference here is greater than 1, that means the parser itself must have 5074 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5075 # 5076 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5077 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5078 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5079 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5080 # 5081 # In these cases, we don't really want to return the converted type, but instead retreat 5082 # and try to parse a Column or Identifier in the section below. 5083 if data_type.expressions and index2 - index > 1: 5084 self._retreat(index2) 5085 return self._parse_column_ops(data_type) 5086 5087 self._retreat(index) 5088 5089 if fallback_to_identifier: 5090 return self._parse_id_var() 5091 5092 this = self._parse_column() 5093 return this and self._parse_column_ops(this) 5094 5095 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5096 this = self._parse_type() 5097 if not this: 5098 return None 5099 5100 if isinstance(this, exp.Column) and not this.table: 5101 this = exp.var(this.name.upper()) 5102 5103 return self.expression( 5104 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5105 ) 5106 5107 def _parse_types( 5108 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5109 ) -> t.Optional[exp.Expression]: 5110 index = self._index 5111 5112 this: t.Optional[exp.Expression] = None 5113 prefix = self._match_text_seq("SYSUDTLIB", ".") 5114 5115 if not self._match_set(self.TYPE_TOKENS): 5116 identifier = allow_identifiers and self._parse_id_var( 5117 any_token=False, tokens=(TokenType.VAR,) 5118 ) 5119 if isinstance(identifier, exp.Identifier): 5120 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5121 5122 if len(tokens) != 1: 5123 self.raise_error("Unexpected identifier", self._prev) 5124 5125 if tokens[0].token_type in self.TYPE_TOKENS: 5126 self._prev = tokens[0] 5127 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5128 type_name = identifier.name 5129 5130 while self._match(TokenType.DOT): 5131 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5132 5133 this = exp.DataType.build(type_name, udt=True) 5134 else: 5135 self._retreat(self._index - 1) 5136 return None 5137 else: 5138 return None 5139 5140 type_token = self._prev.token_type 5141 5142 if type_token == TokenType.PSEUDO_TYPE: 5143 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5144 5145 if type_token == TokenType.OBJECT_IDENTIFIER: 5146 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5147 5148 # https://materialize.com/docs/sql/types/map/ 5149 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5150 key_type = self._parse_types( 5151 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5152 ) 5153 if not self._match(TokenType.FARROW): 5154 self._retreat(index) 5155 return None 5156 5157 value_type = self._parse_types( 5158 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5159 ) 5160 if not self._match(TokenType.R_BRACKET): 5161 self._retreat(index) 5162 return None 5163 5164 return exp.DataType( 5165 this=exp.DataType.Type.MAP, 5166 expressions=[key_type, value_type], 5167 nested=True, 5168 prefix=prefix, 5169 ) 5170 5171 nested = type_token in self.NESTED_TYPE_TOKENS 5172 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5173 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5174 expressions = None 5175 maybe_func = False 5176 5177 if self._match(TokenType.L_PAREN): 5178 if is_struct: 5179 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5180 elif nested: 5181 expressions = self._parse_csv( 5182 lambda: self._parse_types( 5183 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5184 ) 5185 ) 5186 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5187 this = expressions[0] 5188 this.set("nullable", True) 5189 self._match_r_paren() 5190 return this 5191 elif type_token in self.ENUM_TYPE_TOKENS: 5192 expressions = self._parse_csv(self._parse_equality) 5193 elif is_aggregate: 5194 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5195 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5196 ) 5197 if not func_or_ident: 5198 return None 5199 expressions = [func_or_ident] 5200 if self._match(TokenType.COMMA): 5201 expressions.extend( 5202 self._parse_csv( 5203 lambda: self._parse_types( 5204 check_func=check_func, 5205 schema=schema, 5206 allow_identifiers=allow_identifiers, 5207 ) 5208 ) 5209 ) 5210 else: 5211 expressions = self._parse_csv(self._parse_type_size) 5212 5213 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5214 if type_token == TokenType.VECTOR and len(expressions) == 2: 5215 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5216 5217 if not expressions or not self._match(TokenType.R_PAREN): 5218 self._retreat(index) 5219 return None 5220 5221 maybe_func = True 5222 5223 values: t.Optional[t.List[exp.Expression]] = None 5224 5225 if nested and self._match(TokenType.LT): 5226 if is_struct: 5227 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5228 else: 5229 expressions = self._parse_csv( 5230 lambda: self._parse_types( 5231 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5232 ) 5233 ) 5234 5235 if not self._match(TokenType.GT): 5236 self.raise_error("Expecting >") 5237 5238 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5239 values = self._parse_csv(self._parse_assignment) 5240 if not values and is_struct: 5241 values = None 5242 self._retreat(self._index - 1) 5243 else: 5244 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5245 5246 if type_token in self.TIMESTAMPS: 5247 if self._match_text_seq("WITH", "TIME", "ZONE"): 5248 maybe_func = False 5249 tz_type = ( 5250 exp.DataType.Type.TIMETZ 5251 if type_token in self.TIMES 5252 else exp.DataType.Type.TIMESTAMPTZ 5253 ) 5254 this = exp.DataType(this=tz_type, expressions=expressions) 5255 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5256 maybe_func = False 5257 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5258 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5259 maybe_func = False 5260 elif type_token == TokenType.INTERVAL: 5261 unit = self._parse_var(upper=True) 5262 if unit: 5263 if self._match_text_seq("TO"): 5264 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5265 5266 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5267 else: 5268 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5269 elif type_token == TokenType.VOID: 5270 this = exp.DataType(this=exp.DataType.Type.NULL) 5271 5272 if maybe_func and check_func: 5273 index2 = self._index 5274 peek = self._parse_string() 5275 5276 if not peek: 5277 self._retreat(index) 5278 return None 5279 5280 self._retreat(index2) 5281 5282 if not this: 5283 if self._match_text_seq("UNSIGNED"): 5284 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5285 if not unsigned_type_token: 5286 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5287 5288 type_token = unsigned_type_token or type_token 5289 5290 this = exp.DataType( 5291 this=exp.DataType.Type[type_token.value], 5292 expressions=expressions, 5293 nested=nested, 5294 prefix=prefix, 5295 ) 5296 5297 # Empty arrays/structs are allowed 5298 if values is not None: 5299 cls = exp.Struct if is_struct else exp.Array 5300 this = exp.cast(cls(expressions=values), this, copy=False) 5301 5302 elif expressions: 5303 this.set("expressions", expressions) 5304 5305 # https://materialize.com/docs/sql/types/list/#type-name 5306 while self._match(TokenType.LIST): 5307 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5308 5309 index = self._index 5310 5311 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5312 matched_array = self._match(TokenType.ARRAY) 5313 5314 while self._curr: 5315 datatype_token = self._prev.token_type 5316 matched_l_bracket = self._match(TokenType.L_BRACKET) 5317 5318 if (not matched_l_bracket and not matched_array) or ( 5319 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5320 ): 5321 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5322 # not to be confused with the fixed size array parsing 5323 break 5324 5325 matched_array = False 5326 values = self._parse_csv(self._parse_assignment) or None 5327 if ( 5328 values 5329 and not schema 5330 and ( 5331 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5332 ) 5333 ): 5334 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5335 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5336 self._retreat(index) 5337 break 5338 5339 this = exp.DataType( 5340 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5341 ) 5342 self._match(TokenType.R_BRACKET) 5343 5344 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5345 converter = self.TYPE_CONVERTERS.get(this.this) 5346 if converter: 5347 this = converter(t.cast(exp.DataType, this)) 5348 5349 return this 5350 5351 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5352 index = self._index 5353 5354 if ( 5355 self._curr 5356 and self._next 5357 and self._curr.token_type in self.TYPE_TOKENS 5358 and self._next.token_type in self.TYPE_TOKENS 5359 ): 5360 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5361 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5362 this = self._parse_id_var() 5363 else: 5364 this = ( 5365 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5366 or self._parse_id_var() 5367 ) 5368 5369 self._match(TokenType.COLON) 5370 5371 if ( 5372 type_required 5373 and not isinstance(this, exp.DataType) 5374 and not self._match_set(self.TYPE_TOKENS, advance=False) 5375 ): 5376 self._retreat(index) 5377 return self._parse_types() 5378 5379 return self._parse_column_def(this) 5380 5381 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5382 if not self._match_text_seq("AT", "TIME", "ZONE"): 5383 return this 5384 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5385 5386 def _parse_column(self) -> t.Optional[exp.Expression]: 5387 this = self._parse_column_reference() 5388 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5389 5390 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5391 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5392 5393 return column 5394 5395 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5396 this = self._parse_field() 5397 if ( 5398 not this 5399 and self._match(TokenType.VALUES, advance=False) 5400 and self.VALUES_FOLLOWED_BY_PAREN 5401 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5402 ): 5403 this = self._parse_id_var() 5404 5405 if isinstance(this, exp.Identifier): 5406 # We bubble up comments from the Identifier to the Column 5407 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5408 5409 return this 5410 5411 def _parse_colon_as_variant_extract( 5412 self, this: t.Optional[exp.Expression] 5413 ) -> t.Optional[exp.Expression]: 5414 casts = [] 5415 json_path = [] 5416 escape = None 5417 5418 while self._match(TokenType.COLON): 5419 start_index = self._index 5420 5421 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5422 path = self._parse_column_ops( 5423 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5424 ) 5425 5426 # The cast :: operator has a lower precedence than the extraction operator :, so 5427 # we rearrange the AST appropriately to avoid casting the JSON path 5428 while isinstance(path, exp.Cast): 5429 casts.append(path.to) 5430 path = path.this 5431 5432 if casts: 5433 dcolon_offset = next( 5434 i 5435 for i, t in enumerate(self._tokens[start_index:]) 5436 if t.token_type == TokenType.DCOLON 5437 ) 5438 end_token = self._tokens[start_index + dcolon_offset - 1] 5439 else: 5440 end_token = self._prev 5441 5442 if path: 5443 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5444 # it'll roundtrip to a string literal in GET_PATH 5445 if isinstance(path, exp.Identifier) and path.quoted: 5446 escape = True 5447 5448 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5449 5450 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5451 # Databricks transforms it back to the colon/dot notation 5452 if json_path: 5453 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5454 5455 if json_path_expr: 5456 json_path_expr.set("escape", escape) 5457 5458 this = self.expression( 5459 exp.JSONExtract, 5460 this=this, 5461 expression=json_path_expr, 5462 variant_extract=True, 5463 ) 5464 5465 while casts: 5466 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5467 5468 return this 5469 5470 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5471 return self._parse_types() 5472 5473 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5474 this = self._parse_bracket(this) 5475 5476 while self._match_set(self.COLUMN_OPERATORS): 5477 op_token = self._prev.token_type 5478 op = self.COLUMN_OPERATORS.get(op_token) 5479 5480 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5481 field = self._parse_dcolon() 5482 if not field: 5483 self.raise_error("Expected type") 5484 elif op and self._curr: 5485 field = self._parse_column_reference() or self._parse_bracket() 5486 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5487 field = self._parse_column_ops(field) 5488 else: 5489 field = self._parse_field(any_token=True, anonymous_func=True) 5490 5491 if isinstance(field, (exp.Func, exp.Window)) and this: 5492 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5493 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5494 this = exp.replace_tree( 5495 this, 5496 lambda n: ( 5497 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5498 if n.table 5499 else n.this 5500 ) 5501 if isinstance(n, exp.Column) 5502 else n, 5503 ) 5504 5505 if op: 5506 this = op(self, this, field) 5507 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5508 this = self.expression( 5509 exp.Column, 5510 comments=this.comments, 5511 this=field, 5512 table=this.this, 5513 db=this.args.get("table"), 5514 catalog=this.args.get("db"), 5515 ) 5516 elif isinstance(field, exp.Window): 5517 # Move the exp.Dot's to the window's function 5518 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5519 field.set("this", window_func) 5520 this = field 5521 else: 5522 this = self.expression(exp.Dot, this=this, expression=field) 5523 5524 if field and field.comments: 5525 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5526 5527 this = self._parse_bracket(this) 5528 5529 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5530 5531 def _parse_primary(self) -> t.Optional[exp.Expression]: 5532 if self._match_set(self.PRIMARY_PARSERS): 5533 token_type = self._prev.token_type 5534 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5535 5536 if token_type == TokenType.STRING: 5537 expressions = [primary] 5538 while self._match(TokenType.STRING): 5539 expressions.append(exp.Literal.string(self._prev.text)) 5540 5541 if len(expressions) > 1: 5542 return self.expression(exp.Concat, expressions=expressions) 5543 5544 return primary 5545 5546 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5547 return exp.Literal.number(f"0.{self._prev.text}") 5548 5549 if self._match(TokenType.L_PAREN): 5550 comments = self._prev_comments 5551 query = self._parse_select() 5552 5553 if query: 5554 expressions = [query] 5555 else: 5556 expressions = self._parse_expressions() 5557 5558 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5559 5560 if not this and self._match(TokenType.R_PAREN, advance=False): 5561 this = self.expression(exp.Tuple) 5562 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5563 this = self._parse_subquery(this=this, parse_alias=False) 5564 elif isinstance(this, exp.Subquery): 5565 this = self._parse_subquery( 5566 this=self._parse_set_operations(this), parse_alias=False 5567 ) 5568 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5569 this = self.expression(exp.Tuple, expressions=expressions) 5570 else: 5571 this = self.expression(exp.Paren, this=this) 5572 5573 if this: 5574 this.add_comments(comments) 5575 5576 self._match_r_paren(expression=this) 5577 return this 5578 5579 return None 5580 5581 def _parse_field( 5582 self, 5583 any_token: bool = False, 5584 tokens: t.Optional[t.Collection[TokenType]] = None, 5585 anonymous_func: bool = False, 5586 ) -> t.Optional[exp.Expression]: 5587 if anonymous_func: 5588 field = ( 5589 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5590 or self._parse_primary() 5591 ) 5592 else: 5593 field = self._parse_primary() or self._parse_function( 5594 anonymous=anonymous_func, any_token=any_token 5595 ) 5596 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5597 5598 def _parse_function( 5599 self, 5600 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5601 anonymous: bool = False, 5602 optional_parens: bool = True, 5603 any_token: bool = False, 5604 ) -> t.Optional[exp.Expression]: 5605 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5606 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5607 fn_syntax = False 5608 if ( 5609 self._match(TokenType.L_BRACE, advance=False) 5610 and self._next 5611 and self._next.text.upper() == "FN" 5612 ): 5613 self._advance(2) 5614 fn_syntax = True 5615 5616 func = self._parse_function_call( 5617 functions=functions, 5618 anonymous=anonymous, 5619 optional_parens=optional_parens, 5620 any_token=any_token, 5621 ) 5622 5623 if fn_syntax: 5624 self._match(TokenType.R_BRACE) 5625 5626 return func 5627 5628 def _parse_function_call( 5629 self, 5630 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5631 anonymous: bool = False, 5632 optional_parens: bool = True, 5633 any_token: bool = False, 5634 ) -> t.Optional[exp.Expression]: 5635 if not self._curr: 5636 return None 5637 5638 comments = self._curr.comments 5639 token = self._curr 5640 token_type = self._curr.token_type 5641 this = self._curr.text 5642 upper = this.upper() 5643 5644 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5645 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5646 self._advance() 5647 return self._parse_window(parser(self)) 5648 5649 if not self._next or self._next.token_type != TokenType.L_PAREN: 5650 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5651 self._advance() 5652 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5653 5654 return None 5655 5656 if any_token: 5657 if token_type in self.RESERVED_TOKENS: 5658 return None 5659 elif token_type not in self.FUNC_TOKENS: 5660 return None 5661 5662 self._advance(2) 5663 5664 parser = self.FUNCTION_PARSERS.get(upper) 5665 if parser and not anonymous: 5666 this = parser(self) 5667 else: 5668 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5669 5670 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5671 this = self.expression( 5672 subquery_predicate, comments=comments, this=self._parse_select() 5673 ) 5674 self._match_r_paren() 5675 return this 5676 5677 if functions is None: 5678 functions = self.FUNCTIONS 5679 5680 function = functions.get(upper) 5681 known_function = function and not anonymous 5682 5683 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5684 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5685 5686 post_func_comments = self._curr and self._curr.comments 5687 if known_function and post_func_comments: 5688 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5689 # call we'll construct it as exp.Anonymous, even if it's "known" 5690 if any( 5691 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5692 for comment in post_func_comments 5693 ): 5694 known_function = False 5695 5696 if alias and known_function: 5697 args = self._kv_to_prop_eq(args) 5698 5699 if known_function: 5700 func_builder = t.cast(t.Callable, function) 5701 5702 if "dialect" in func_builder.__code__.co_varnames: 5703 func = func_builder(args, dialect=self.dialect) 5704 else: 5705 func = func_builder(args) 5706 5707 func = self.validate_expression(func, args) 5708 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5709 func.meta["name"] = this 5710 5711 this = func 5712 else: 5713 if token_type == TokenType.IDENTIFIER: 5714 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5715 5716 this = self.expression(exp.Anonymous, this=this, expressions=args) 5717 this = this.update_positions(token) 5718 5719 if isinstance(this, exp.Expression): 5720 this.add_comments(comments) 5721 5722 self._match_r_paren(this) 5723 return self._parse_window(this) 5724 5725 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5726 return expression 5727 5728 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5729 transformed = [] 5730 5731 for index, e in enumerate(expressions): 5732 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5733 if isinstance(e, exp.Alias): 5734 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5735 5736 if not isinstance(e, exp.PropertyEQ): 5737 e = self.expression( 5738 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5739 ) 5740 5741 if isinstance(e.this, exp.Column): 5742 e.this.replace(e.this.this) 5743 else: 5744 e = self._to_prop_eq(e, index) 5745 5746 transformed.append(e) 5747 5748 return transformed 5749 5750 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5751 return self._parse_statement() 5752 5753 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5754 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5755 5756 def _parse_user_defined_function( 5757 self, kind: t.Optional[TokenType] = None 5758 ) -> t.Optional[exp.Expression]: 5759 this = self._parse_table_parts(schema=True) 5760 5761 if not self._match(TokenType.L_PAREN): 5762 return this 5763 5764 expressions = self._parse_csv(self._parse_function_parameter) 5765 self._match_r_paren() 5766 return self.expression( 5767 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5768 ) 5769 5770 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5771 literal = self._parse_primary() 5772 if literal: 5773 return self.expression(exp.Introducer, this=token.text, expression=literal) 5774 5775 return self._identifier_expression(token) 5776 5777 def _parse_session_parameter(self) -> exp.SessionParameter: 5778 kind = None 5779 this = self._parse_id_var() or self._parse_primary() 5780 5781 if this and self._match(TokenType.DOT): 5782 kind = this.name 5783 this = self._parse_var() or self._parse_primary() 5784 5785 return self.expression(exp.SessionParameter, this=this, kind=kind) 5786 5787 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5788 return self._parse_id_var() 5789 5790 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5791 index = self._index 5792 5793 if self._match(TokenType.L_PAREN): 5794 expressions = t.cast( 5795 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5796 ) 5797 5798 if not self._match(TokenType.R_PAREN): 5799 self._retreat(index) 5800 else: 5801 expressions = [self._parse_lambda_arg()] 5802 5803 if self._match_set(self.LAMBDAS): 5804 return self.LAMBDAS[self._prev.token_type](self, expressions) 5805 5806 self._retreat(index) 5807 5808 this: t.Optional[exp.Expression] 5809 5810 if self._match(TokenType.DISTINCT): 5811 this = self.expression( 5812 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5813 ) 5814 else: 5815 this = self._parse_select_or_expression(alias=alias) 5816 5817 return self._parse_limit( 5818 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5819 ) 5820 5821 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5822 index = self._index 5823 if not self._match(TokenType.L_PAREN): 5824 return this 5825 5826 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5827 # expr can be of both types 5828 if self._match_set(self.SELECT_START_TOKENS): 5829 self._retreat(index) 5830 return this 5831 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5832 self._match_r_paren() 5833 return self.expression(exp.Schema, this=this, expressions=args) 5834 5835 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5836 return self._parse_column_def(self._parse_field(any_token=True)) 5837 5838 def _parse_column_def( 5839 self, this: t.Optional[exp.Expression], computed_column: bool = True 5840 ) -> t.Optional[exp.Expression]: 5841 # column defs are not really columns, they're identifiers 5842 if isinstance(this, exp.Column): 5843 this = this.this 5844 5845 if not computed_column: 5846 self._match(TokenType.ALIAS) 5847 5848 kind = self._parse_types(schema=True) 5849 5850 if self._match_text_seq("FOR", "ORDINALITY"): 5851 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5852 5853 constraints: t.List[exp.Expression] = [] 5854 5855 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5856 ("ALIAS", "MATERIALIZED") 5857 ): 5858 persisted = self._prev.text.upper() == "MATERIALIZED" 5859 constraint_kind = exp.ComputedColumnConstraint( 5860 this=self._parse_assignment(), 5861 persisted=persisted or self._match_text_seq("PERSISTED"), 5862 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5863 ) 5864 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5865 elif ( 5866 kind 5867 and self._match(TokenType.ALIAS, advance=False) 5868 and ( 5869 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5870 or (self._next and self._next.token_type == TokenType.L_PAREN) 5871 ) 5872 ): 5873 self._advance() 5874 constraints.append( 5875 self.expression( 5876 exp.ColumnConstraint, 5877 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5878 ) 5879 ) 5880 5881 while True: 5882 constraint = self._parse_column_constraint() 5883 if not constraint: 5884 break 5885 constraints.append(constraint) 5886 5887 if not kind and not constraints: 5888 return this 5889 5890 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5891 5892 def _parse_auto_increment( 5893 self, 5894 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5895 start = None 5896 increment = None 5897 5898 if self._match(TokenType.L_PAREN, advance=False): 5899 args = self._parse_wrapped_csv(self._parse_bitwise) 5900 start = seq_get(args, 0) 5901 increment = seq_get(args, 1) 5902 elif self._match_text_seq("START"): 5903 start = self._parse_bitwise() 5904 self._match_text_seq("INCREMENT") 5905 increment = self._parse_bitwise() 5906 5907 if start and increment: 5908 return exp.GeneratedAsIdentityColumnConstraint( 5909 start=start, increment=increment, this=False 5910 ) 5911 5912 return exp.AutoIncrementColumnConstraint() 5913 5914 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5915 if not self._match_text_seq("REFRESH"): 5916 self._retreat(self._index - 1) 5917 return None 5918 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5919 5920 def _parse_compress(self) -> exp.CompressColumnConstraint: 5921 if self._match(TokenType.L_PAREN, advance=False): 5922 return self.expression( 5923 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5924 ) 5925 5926 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5927 5928 def _parse_generated_as_identity( 5929 self, 5930 ) -> ( 5931 exp.GeneratedAsIdentityColumnConstraint 5932 | exp.ComputedColumnConstraint 5933 | exp.GeneratedAsRowColumnConstraint 5934 ): 5935 if self._match_text_seq("BY", "DEFAULT"): 5936 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5937 this = self.expression( 5938 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5939 ) 5940 else: 5941 self._match_text_seq("ALWAYS") 5942 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5943 5944 self._match(TokenType.ALIAS) 5945 5946 if self._match_text_seq("ROW"): 5947 start = self._match_text_seq("START") 5948 if not start: 5949 self._match(TokenType.END) 5950 hidden = self._match_text_seq("HIDDEN") 5951 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5952 5953 identity = self._match_text_seq("IDENTITY") 5954 5955 if self._match(TokenType.L_PAREN): 5956 if self._match(TokenType.START_WITH): 5957 this.set("start", self._parse_bitwise()) 5958 if self._match_text_seq("INCREMENT", "BY"): 5959 this.set("increment", self._parse_bitwise()) 5960 if self._match_text_seq("MINVALUE"): 5961 this.set("minvalue", self._parse_bitwise()) 5962 if self._match_text_seq("MAXVALUE"): 5963 this.set("maxvalue", self._parse_bitwise()) 5964 5965 if self._match_text_seq("CYCLE"): 5966 this.set("cycle", True) 5967 elif self._match_text_seq("NO", "CYCLE"): 5968 this.set("cycle", False) 5969 5970 if not identity: 5971 this.set("expression", self._parse_range()) 5972 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5973 args = self._parse_csv(self._parse_bitwise) 5974 this.set("start", seq_get(args, 0)) 5975 this.set("increment", seq_get(args, 1)) 5976 5977 self._match_r_paren() 5978 5979 return this 5980 5981 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5982 self._match_text_seq("LENGTH") 5983 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5984 5985 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5986 if self._match_text_seq("NULL"): 5987 return self.expression(exp.NotNullColumnConstraint) 5988 if self._match_text_seq("CASESPECIFIC"): 5989 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5990 if self._match_text_seq("FOR", "REPLICATION"): 5991 return self.expression(exp.NotForReplicationColumnConstraint) 5992 5993 # Unconsume the `NOT` token 5994 self._retreat(self._index - 1) 5995 return None 5996 5997 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5998 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5999 6000 procedure_option_follows = ( 6001 self._match(TokenType.WITH, advance=False) 6002 and self._next 6003 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6004 ) 6005 6006 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6007 return self.expression( 6008 exp.ColumnConstraint, 6009 this=this, 6010 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6011 ) 6012 6013 return this 6014 6015 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6016 if not self._match(TokenType.CONSTRAINT): 6017 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6018 6019 return self.expression( 6020 exp.Constraint, 6021 this=self._parse_id_var(), 6022 expressions=self._parse_unnamed_constraints(), 6023 ) 6024 6025 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6026 constraints = [] 6027 while True: 6028 constraint = self._parse_unnamed_constraint() or self._parse_function() 6029 if not constraint: 6030 break 6031 constraints.append(constraint) 6032 6033 return constraints 6034 6035 def _parse_unnamed_constraint( 6036 self, constraints: t.Optional[t.Collection[str]] = None 6037 ) -> t.Optional[exp.Expression]: 6038 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6039 constraints or self.CONSTRAINT_PARSERS 6040 ): 6041 return None 6042 6043 constraint = self._prev.text.upper() 6044 if constraint not in self.CONSTRAINT_PARSERS: 6045 self.raise_error(f"No parser found for schema constraint {constraint}.") 6046 6047 return self.CONSTRAINT_PARSERS[constraint](self) 6048 6049 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6050 return self._parse_id_var(any_token=False) 6051 6052 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6053 self._match_text_seq("KEY") 6054 return self.expression( 6055 exp.UniqueColumnConstraint, 6056 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6057 this=self._parse_schema(self._parse_unique_key()), 6058 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6059 on_conflict=self._parse_on_conflict(), 6060 options=self._parse_key_constraint_options(), 6061 ) 6062 6063 def _parse_key_constraint_options(self) -> t.List[str]: 6064 options = [] 6065 while True: 6066 if not self._curr: 6067 break 6068 6069 if self._match(TokenType.ON): 6070 action = None 6071 on = self._advance_any() and self._prev.text 6072 6073 if self._match_text_seq("NO", "ACTION"): 6074 action = "NO ACTION" 6075 elif self._match_text_seq("CASCADE"): 6076 action = "CASCADE" 6077 elif self._match_text_seq("RESTRICT"): 6078 action = "RESTRICT" 6079 elif self._match_pair(TokenType.SET, TokenType.NULL): 6080 action = "SET NULL" 6081 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6082 action = "SET DEFAULT" 6083 else: 6084 self.raise_error("Invalid key constraint") 6085 6086 options.append(f"ON {on} {action}") 6087 else: 6088 var = self._parse_var_from_options( 6089 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6090 ) 6091 if not var: 6092 break 6093 options.append(var.name) 6094 6095 return options 6096 6097 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6098 if match and not self._match(TokenType.REFERENCES): 6099 return None 6100 6101 expressions = None 6102 this = self._parse_table(schema=True) 6103 options = self._parse_key_constraint_options() 6104 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6105 6106 def _parse_foreign_key(self) -> exp.ForeignKey: 6107 expressions = ( 6108 self._parse_wrapped_id_vars() 6109 if not self._match(TokenType.REFERENCES, advance=False) 6110 else None 6111 ) 6112 reference = self._parse_references() 6113 on_options = {} 6114 6115 while self._match(TokenType.ON): 6116 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6117 self.raise_error("Expected DELETE or UPDATE") 6118 6119 kind = self._prev.text.lower() 6120 6121 if self._match_text_seq("NO", "ACTION"): 6122 action = "NO ACTION" 6123 elif self._match(TokenType.SET): 6124 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6125 action = "SET " + self._prev.text.upper() 6126 else: 6127 self._advance() 6128 action = self._prev.text.upper() 6129 6130 on_options[kind] = action 6131 6132 return self.expression( 6133 exp.ForeignKey, 6134 expressions=expressions, 6135 reference=reference, 6136 options=self._parse_key_constraint_options(), 6137 **on_options, # type: ignore 6138 ) 6139 6140 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6141 return self._parse_ordered() or self._parse_field() 6142 6143 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6144 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6145 self._retreat(self._index - 1) 6146 return None 6147 6148 id_vars = self._parse_wrapped_id_vars() 6149 return self.expression( 6150 exp.PeriodForSystemTimeConstraint, 6151 this=seq_get(id_vars, 0), 6152 expression=seq_get(id_vars, 1), 6153 ) 6154 6155 def _parse_primary_key( 6156 self, wrapped_optional: bool = False, in_props: bool = False 6157 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6158 desc = ( 6159 self._match_set((TokenType.ASC, TokenType.DESC)) 6160 and self._prev.token_type == TokenType.DESC 6161 ) 6162 6163 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6164 return self.expression( 6165 exp.PrimaryKeyColumnConstraint, 6166 desc=desc, 6167 options=self._parse_key_constraint_options(), 6168 ) 6169 6170 expressions = self._parse_wrapped_csv( 6171 self._parse_primary_key_part, optional=wrapped_optional 6172 ) 6173 options = self._parse_key_constraint_options() 6174 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6175 6176 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6177 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6178 6179 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6180 """ 6181 Parses a datetime column in ODBC format. We parse the column into the corresponding 6182 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6183 same as we did for `DATE('yyyy-mm-dd')`. 6184 6185 Reference: 6186 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6187 """ 6188 self._match(TokenType.VAR) 6189 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6190 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6191 if not self._match(TokenType.R_BRACE): 6192 self.raise_error("Expected }") 6193 return expression 6194 6195 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6196 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6197 return this 6198 6199 bracket_kind = self._prev.token_type 6200 if ( 6201 bracket_kind == TokenType.L_BRACE 6202 and self._curr 6203 and self._curr.token_type == TokenType.VAR 6204 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6205 ): 6206 return self._parse_odbc_datetime_literal() 6207 6208 expressions = self._parse_csv( 6209 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6210 ) 6211 6212 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6213 self.raise_error("Expected ]") 6214 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6215 self.raise_error("Expected }") 6216 6217 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6218 if bracket_kind == TokenType.L_BRACE: 6219 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6220 elif not this: 6221 this = build_array_constructor( 6222 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6223 ) 6224 else: 6225 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6226 if constructor_type: 6227 return build_array_constructor( 6228 constructor_type, 6229 args=expressions, 6230 bracket_kind=bracket_kind, 6231 dialect=self.dialect, 6232 ) 6233 6234 expressions = apply_index_offset( 6235 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6236 ) 6237 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6238 6239 self._add_comments(this) 6240 return self._parse_bracket(this) 6241 6242 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6243 if self._match(TokenType.COLON): 6244 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6245 return this 6246 6247 def _parse_case(self) -> t.Optional[exp.Expression]: 6248 ifs = [] 6249 default = None 6250 6251 comments = self._prev_comments 6252 expression = self._parse_assignment() 6253 6254 while self._match(TokenType.WHEN): 6255 this = self._parse_assignment() 6256 self._match(TokenType.THEN) 6257 then = self._parse_assignment() 6258 ifs.append(self.expression(exp.If, this=this, true=then)) 6259 6260 if self._match(TokenType.ELSE): 6261 default = self._parse_assignment() 6262 6263 if not self._match(TokenType.END): 6264 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6265 default = exp.column("interval") 6266 else: 6267 self.raise_error("Expected END after CASE", self._prev) 6268 6269 return self.expression( 6270 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6271 ) 6272 6273 def _parse_if(self) -> t.Optional[exp.Expression]: 6274 if self._match(TokenType.L_PAREN): 6275 args = self._parse_csv( 6276 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6277 ) 6278 this = self.validate_expression(exp.If.from_arg_list(args), args) 6279 self._match_r_paren() 6280 else: 6281 index = self._index - 1 6282 6283 if self.NO_PAREN_IF_COMMANDS and index == 0: 6284 return self._parse_as_command(self._prev) 6285 6286 condition = self._parse_assignment() 6287 6288 if not condition: 6289 self._retreat(index) 6290 return None 6291 6292 self._match(TokenType.THEN) 6293 true = self._parse_assignment() 6294 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6295 self._match(TokenType.END) 6296 this = self.expression(exp.If, this=condition, true=true, false=false) 6297 6298 return this 6299 6300 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6301 if not self._match_text_seq("VALUE", "FOR"): 6302 self._retreat(self._index - 1) 6303 return None 6304 6305 return self.expression( 6306 exp.NextValueFor, 6307 this=self._parse_column(), 6308 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6309 ) 6310 6311 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6312 this = self._parse_function() or self._parse_var_or_string(upper=True) 6313 6314 if self._match(TokenType.FROM): 6315 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6316 6317 if not self._match(TokenType.COMMA): 6318 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6319 6320 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6321 6322 def _parse_gap_fill(self) -> exp.GapFill: 6323 self._match(TokenType.TABLE) 6324 this = self._parse_table() 6325 6326 self._match(TokenType.COMMA) 6327 args = [this, *self._parse_csv(self._parse_lambda)] 6328 6329 gap_fill = exp.GapFill.from_arg_list(args) 6330 return self.validate_expression(gap_fill, args) 6331 6332 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6333 this = self._parse_assignment() 6334 6335 if not self._match(TokenType.ALIAS): 6336 if self._match(TokenType.COMMA): 6337 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6338 6339 self.raise_error("Expected AS after CAST") 6340 6341 fmt = None 6342 to = self._parse_types() 6343 6344 default = self._match(TokenType.DEFAULT) 6345 if default: 6346 default = self._parse_bitwise() 6347 self._match_text_seq("ON", "CONVERSION", "ERROR") 6348 6349 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6350 fmt_string = self._parse_string() 6351 fmt = self._parse_at_time_zone(fmt_string) 6352 6353 if not to: 6354 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6355 if to.this in exp.DataType.TEMPORAL_TYPES: 6356 this = self.expression( 6357 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6358 this=this, 6359 format=exp.Literal.string( 6360 format_time( 6361 fmt_string.this if fmt_string else "", 6362 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6363 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6364 ) 6365 ), 6366 safe=safe, 6367 ) 6368 6369 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6370 this.set("zone", fmt.args["zone"]) 6371 return this 6372 elif not to: 6373 self.raise_error("Expected TYPE after CAST") 6374 elif isinstance(to, exp.Identifier): 6375 to = exp.DataType.build(to.name, udt=True) 6376 elif to.this == exp.DataType.Type.CHAR: 6377 if self._match(TokenType.CHARACTER_SET): 6378 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6379 6380 return self.expression( 6381 exp.Cast if strict else exp.TryCast, 6382 this=this, 6383 to=to, 6384 format=fmt, 6385 safe=safe, 6386 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6387 default=default, 6388 ) 6389 6390 def _parse_string_agg(self) -> exp.GroupConcat: 6391 if self._match(TokenType.DISTINCT): 6392 args: t.List[t.Optional[exp.Expression]] = [ 6393 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6394 ] 6395 if self._match(TokenType.COMMA): 6396 args.extend(self._parse_csv(self._parse_assignment)) 6397 else: 6398 args = self._parse_csv(self._parse_assignment) # type: ignore 6399 6400 if self._match_text_seq("ON", "OVERFLOW"): 6401 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6402 if self._match_text_seq("ERROR"): 6403 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6404 else: 6405 self._match_text_seq("TRUNCATE") 6406 on_overflow = self.expression( 6407 exp.OverflowTruncateBehavior, 6408 this=self._parse_string(), 6409 with_count=( 6410 self._match_text_seq("WITH", "COUNT") 6411 or not self._match_text_seq("WITHOUT", "COUNT") 6412 ), 6413 ) 6414 else: 6415 on_overflow = None 6416 6417 index = self._index 6418 if not self._match(TokenType.R_PAREN) and args: 6419 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6420 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6421 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6422 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6423 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6424 6425 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6426 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6427 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6428 if not self._match_text_seq("WITHIN", "GROUP"): 6429 self._retreat(index) 6430 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6431 6432 # The corresponding match_r_paren will be called in parse_function (caller) 6433 self._match_l_paren() 6434 6435 return self.expression( 6436 exp.GroupConcat, 6437 this=self._parse_order(this=seq_get(args, 0)), 6438 separator=seq_get(args, 1), 6439 on_overflow=on_overflow, 6440 ) 6441 6442 def _parse_convert( 6443 self, strict: bool, safe: t.Optional[bool] = None 6444 ) -> t.Optional[exp.Expression]: 6445 this = self._parse_bitwise() 6446 6447 if self._match(TokenType.USING): 6448 to: t.Optional[exp.Expression] = self.expression( 6449 exp.CharacterSet, this=self._parse_var() 6450 ) 6451 elif self._match(TokenType.COMMA): 6452 to = self._parse_types() 6453 else: 6454 to = None 6455 6456 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6457 6458 def _parse_xml_table(self) -> exp.XMLTable: 6459 namespaces = None 6460 passing = None 6461 columns = None 6462 6463 if self._match_text_seq("XMLNAMESPACES", "("): 6464 namespaces = self._parse_xml_namespace() 6465 self._match_text_seq(")", ",") 6466 6467 this = self._parse_string() 6468 6469 if self._match_text_seq("PASSING"): 6470 # The BY VALUE keywords are optional and are provided for semantic clarity 6471 self._match_text_seq("BY", "VALUE") 6472 passing = self._parse_csv(self._parse_column) 6473 6474 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6475 6476 if self._match_text_seq("COLUMNS"): 6477 columns = self._parse_csv(self._parse_field_def) 6478 6479 return self.expression( 6480 exp.XMLTable, 6481 this=this, 6482 namespaces=namespaces, 6483 passing=passing, 6484 columns=columns, 6485 by_ref=by_ref, 6486 ) 6487 6488 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6489 namespaces = [] 6490 6491 while True: 6492 if self._match(TokenType.DEFAULT): 6493 uri = self._parse_string() 6494 else: 6495 uri = self._parse_alias(self._parse_string()) 6496 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6497 if not self._match(TokenType.COMMA): 6498 break 6499 6500 return namespaces 6501 6502 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6503 """ 6504 There are generally two variants of the DECODE function: 6505 6506 - DECODE(bin, charset) 6507 - DECODE(expression, search, result [, search, result] ... [, default]) 6508 6509 The second variant will always be parsed into a CASE expression. Note that NULL 6510 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6511 instead of relying on pattern matching. 6512 """ 6513 args = self._parse_csv(self._parse_assignment) 6514 6515 if len(args) < 3: 6516 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6517 6518 expression, *expressions = args 6519 if not expression: 6520 return None 6521 6522 ifs = [] 6523 for search, result in zip(expressions[::2], expressions[1::2]): 6524 if not search or not result: 6525 return None 6526 6527 if isinstance(search, exp.Literal): 6528 ifs.append( 6529 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6530 ) 6531 elif isinstance(search, exp.Null): 6532 ifs.append( 6533 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6534 ) 6535 else: 6536 cond = exp.or_( 6537 exp.EQ(this=expression.copy(), expression=search), 6538 exp.and_( 6539 exp.Is(this=expression.copy(), expression=exp.Null()), 6540 exp.Is(this=search.copy(), expression=exp.Null()), 6541 copy=False, 6542 ), 6543 copy=False, 6544 ) 6545 ifs.append(exp.If(this=cond, true=result)) 6546 6547 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6548 6549 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6550 self._match_text_seq("KEY") 6551 key = self._parse_column() 6552 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6553 self._match_text_seq("VALUE") 6554 value = self._parse_bitwise() 6555 6556 if not key and not value: 6557 return None 6558 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6559 6560 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6561 if not this or not self._match_text_seq("FORMAT", "JSON"): 6562 return this 6563 6564 return self.expression(exp.FormatJson, this=this) 6565 6566 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6567 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6568 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6569 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6570 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6571 else: 6572 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6573 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6574 6575 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6576 6577 if not empty and not error and not null: 6578 return None 6579 6580 return self.expression( 6581 exp.OnCondition, 6582 empty=empty, 6583 error=error, 6584 null=null, 6585 ) 6586 6587 def _parse_on_handling( 6588 self, on: str, *values: str 6589 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6590 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6591 for value in values: 6592 if self._match_text_seq(value, "ON", on): 6593 return f"{value} ON {on}" 6594 6595 index = self._index 6596 if self._match(TokenType.DEFAULT): 6597 default_value = self._parse_bitwise() 6598 if self._match_text_seq("ON", on): 6599 return default_value 6600 6601 self._retreat(index) 6602 6603 return None 6604 6605 @t.overload 6606 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6607 6608 @t.overload 6609 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6610 6611 def _parse_json_object(self, agg=False): 6612 star = self._parse_star() 6613 expressions = ( 6614 [star] 6615 if star 6616 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6617 ) 6618 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6619 6620 unique_keys = None 6621 if self._match_text_seq("WITH", "UNIQUE"): 6622 unique_keys = True 6623 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6624 unique_keys = False 6625 6626 self._match_text_seq("KEYS") 6627 6628 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6629 self._parse_type() 6630 ) 6631 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6632 6633 return self.expression( 6634 exp.JSONObjectAgg if agg else exp.JSONObject, 6635 expressions=expressions, 6636 null_handling=null_handling, 6637 unique_keys=unique_keys, 6638 return_type=return_type, 6639 encoding=encoding, 6640 ) 6641 6642 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6643 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6644 if not self._match_text_seq("NESTED"): 6645 this = self._parse_id_var() 6646 kind = self._parse_types(allow_identifiers=False) 6647 nested = None 6648 else: 6649 this = None 6650 kind = None 6651 nested = True 6652 6653 path = self._match_text_seq("PATH") and self._parse_string() 6654 nested_schema = nested and self._parse_json_schema() 6655 6656 return self.expression( 6657 exp.JSONColumnDef, 6658 this=this, 6659 kind=kind, 6660 path=path, 6661 nested_schema=nested_schema, 6662 ) 6663 6664 def _parse_json_schema(self) -> exp.JSONSchema: 6665 self._match_text_seq("COLUMNS") 6666 return self.expression( 6667 exp.JSONSchema, 6668 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6669 ) 6670 6671 def _parse_json_table(self) -> exp.JSONTable: 6672 this = self._parse_format_json(self._parse_bitwise()) 6673 path = self._match(TokenType.COMMA) and self._parse_string() 6674 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6675 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6676 schema = self._parse_json_schema() 6677 6678 return exp.JSONTable( 6679 this=this, 6680 schema=schema, 6681 path=path, 6682 error_handling=error_handling, 6683 empty_handling=empty_handling, 6684 ) 6685 6686 def _parse_match_against(self) -> exp.MatchAgainst: 6687 expressions = self._parse_csv(self._parse_column) 6688 6689 self._match_text_seq(")", "AGAINST", "(") 6690 6691 this = self._parse_string() 6692 6693 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6694 modifier = "IN NATURAL LANGUAGE MODE" 6695 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6696 modifier = f"{modifier} WITH QUERY EXPANSION" 6697 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6698 modifier = "IN BOOLEAN MODE" 6699 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6700 modifier = "WITH QUERY EXPANSION" 6701 else: 6702 modifier = None 6703 6704 return self.expression( 6705 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6706 ) 6707 6708 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6709 def _parse_open_json(self) -> exp.OpenJSON: 6710 this = self._parse_bitwise() 6711 path = self._match(TokenType.COMMA) and self._parse_string() 6712 6713 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6714 this = self._parse_field(any_token=True) 6715 kind = self._parse_types() 6716 path = self._parse_string() 6717 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6718 6719 return self.expression( 6720 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6721 ) 6722 6723 expressions = None 6724 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6725 self._match_l_paren() 6726 expressions = self._parse_csv(_parse_open_json_column_def) 6727 6728 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6729 6730 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6731 args = self._parse_csv(self._parse_bitwise) 6732 6733 if self._match(TokenType.IN): 6734 return self.expression( 6735 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6736 ) 6737 6738 if haystack_first: 6739 haystack = seq_get(args, 0) 6740 needle = seq_get(args, 1) 6741 else: 6742 haystack = seq_get(args, 1) 6743 needle = seq_get(args, 0) 6744 6745 return self.expression( 6746 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6747 ) 6748 6749 def _parse_predict(self) -> exp.Predict: 6750 self._match_text_seq("MODEL") 6751 this = self._parse_table() 6752 6753 self._match(TokenType.COMMA) 6754 self._match_text_seq("TABLE") 6755 6756 return self.expression( 6757 exp.Predict, 6758 this=this, 6759 expression=self._parse_table(), 6760 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6761 ) 6762 6763 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6764 args = self._parse_csv(self._parse_table) 6765 return exp.JoinHint(this=func_name.upper(), expressions=args) 6766 6767 def _parse_substring(self) -> exp.Substring: 6768 # Postgres supports the form: substring(string [from int] [for int]) 6769 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6770 6771 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6772 6773 if self._match(TokenType.FROM): 6774 args.append(self._parse_bitwise()) 6775 if self._match(TokenType.FOR): 6776 if len(args) == 1: 6777 args.append(exp.Literal.number(1)) 6778 args.append(self._parse_bitwise()) 6779 6780 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6781 6782 def _parse_trim(self) -> exp.Trim: 6783 # https://www.w3resource.com/sql/character-functions/trim.php 6784 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6785 6786 position = None 6787 collation = None 6788 expression = None 6789 6790 if self._match_texts(self.TRIM_TYPES): 6791 position = self._prev.text.upper() 6792 6793 this = self._parse_bitwise() 6794 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6795 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6796 expression = self._parse_bitwise() 6797 6798 if invert_order: 6799 this, expression = expression, this 6800 6801 if self._match(TokenType.COLLATE): 6802 collation = self._parse_bitwise() 6803 6804 return self.expression( 6805 exp.Trim, this=this, position=position, expression=expression, collation=collation 6806 ) 6807 6808 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6809 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6810 6811 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6812 return self._parse_window(self._parse_id_var(), alias=True) 6813 6814 def _parse_respect_or_ignore_nulls( 6815 self, this: t.Optional[exp.Expression] 6816 ) -> t.Optional[exp.Expression]: 6817 if self._match_text_seq("IGNORE", "NULLS"): 6818 return self.expression(exp.IgnoreNulls, this=this) 6819 if self._match_text_seq("RESPECT", "NULLS"): 6820 return self.expression(exp.RespectNulls, this=this) 6821 return this 6822 6823 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6824 if self._match(TokenType.HAVING): 6825 self._match_texts(("MAX", "MIN")) 6826 max = self._prev.text.upper() != "MIN" 6827 return self.expression( 6828 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6829 ) 6830 6831 return this 6832 6833 def _parse_window( 6834 self, this: t.Optional[exp.Expression], alias: bool = False 6835 ) -> t.Optional[exp.Expression]: 6836 func = this 6837 comments = func.comments if isinstance(func, exp.Expression) else None 6838 6839 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6840 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6841 if self._match_text_seq("WITHIN", "GROUP"): 6842 order = self._parse_wrapped(self._parse_order) 6843 this = self.expression(exp.WithinGroup, this=this, expression=order) 6844 6845 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6846 self._match(TokenType.WHERE) 6847 this = self.expression( 6848 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6849 ) 6850 self._match_r_paren() 6851 6852 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6853 # Some dialects choose to implement and some do not. 6854 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6855 6856 # There is some code above in _parse_lambda that handles 6857 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6858 6859 # The below changes handle 6860 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6861 6862 # Oracle allows both formats 6863 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6864 # and Snowflake chose to do the same for familiarity 6865 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6866 if isinstance(this, exp.AggFunc): 6867 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6868 6869 if ignore_respect and ignore_respect is not this: 6870 ignore_respect.replace(ignore_respect.this) 6871 this = self.expression(ignore_respect.__class__, this=this) 6872 6873 this = self._parse_respect_or_ignore_nulls(this) 6874 6875 # bigquery select from window x AS (partition by ...) 6876 if alias: 6877 over = None 6878 self._match(TokenType.ALIAS) 6879 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6880 return this 6881 else: 6882 over = self._prev.text.upper() 6883 6884 if comments and isinstance(func, exp.Expression): 6885 func.pop_comments() 6886 6887 if not self._match(TokenType.L_PAREN): 6888 return self.expression( 6889 exp.Window, 6890 comments=comments, 6891 this=this, 6892 alias=self._parse_id_var(False), 6893 over=over, 6894 ) 6895 6896 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6897 6898 first = self._match(TokenType.FIRST) 6899 if self._match_text_seq("LAST"): 6900 first = False 6901 6902 partition, order = self._parse_partition_and_order() 6903 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6904 6905 if kind: 6906 self._match(TokenType.BETWEEN) 6907 start = self._parse_window_spec() 6908 self._match(TokenType.AND) 6909 end = self._parse_window_spec() 6910 exclude = ( 6911 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6912 if self._match_text_seq("EXCLUDE") 6913 else None 6914 ) 6915 6916 spec = self.expression( 6917 exp.WindowSpec, 6918 kind=kind, 6919 start=start["value"], 6920 start_side=start["side"], 6921 end=end["value"], 6922 end_side=end["side"], 6923 exclude=exclude, 6924 ) 6925 else: 6926 spec = None 6927 6928 self._match_r_paren() 6929 6930 window = self.expression( 6931 exp.Window, 6932 comments=comments, 6933 this=this, 6934 partition_by=partition, 6935 order=order, 6936 spec=spec, 6937 alias=window_alias, 6938 over=over, 6939 first=first, 6940 ) 6941 6942 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6943 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6944 return self._parse_window(window, alias=alias) 6945 6946 return window 6947 6948 def _parse_partition_and_order( 6949 self, 6950 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6951 return self._parse_partition_by(), self._parse_order() 6952 6953 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6954 self._match(TokenType.BETWEEN) 6955 6956 return { 6957 "value": ( 6958 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6959 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6960 or self._parse_bitwise() 6961 ), 6962 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6963 } 6964 6965 def _parse_alias( 6966 self, this: t.Optional[exp.Expression], explicit: bool = False 6967 ) -> t.Optional[exp.Expression]: 6968 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6969 # so this section tries to parse the clause version and if it fails, it treats the token 6970 # as an identifier (alias) 6971 if self._can_parse_limit_or_offset(): 6972 return this 6973 6974 any_token = self._match(TokenType.ALIAS) 6975 comments = self._prev_comments or [] 6976 6977 if explicit and not any_token: 6978 return this 6979 6980 if self._match(TokenType.L_PAREN): 6981 aliases = self.expression( 6982 exp.Aliases, 6983 comments=comments, 6984 this=this, 6985 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6986 ) 6987 self._match_r_paren(aliases) 6988 return aliases 6989 6990 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6991 self.STRING_ALIASES and self._parse_string_as_identifier() 6992 ) 6993 6994 if alias: 6995 comments.extend(alias.pop_comments()) 6996 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6997 column = this.this 6998 6999 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7000 if not this.comments and column and column.comments: 7001 this.comments = column.pop_comments() 7002 7003 return this 7004 7005 def _parse_id_var( 7006 self, 7007 any_token: bool = True, 7008 tokens: t.Optional[t.Collection[TokenType]] = None, 7009 ) -> t.Optional[exp.Expression]: 7010 expression = self._parse_identifier() 7011 if not expression and ( 7012 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7013 ): 7014 quoted = self._prev.token_type == TokenType.STRING 7015 expression = self._identifier_expression(quoted=quoted) 7016 7017 return expression 7018 7019 def _parse_string(self) -> t.Optional[exp.Expression]: 7020 if self._match_set(self.STRING_PARSERS): 7021 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7022 return self._parse_placeholder() 7023 7024 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7025 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7026 if output: 7027 output.update_positions(self._prev) 7028 return output 7029 7030 def _parse_number(self) -> t.Optional[exp.Expression]: 7031 if self._match_set(self.NUMERIC_PARSERS): 7032 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7033 return self._parse_placeholder() 7034 7035 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7036 if self._match(TokenType.IDENTIFIER): 7037 return self._identifier_expression(quoted=True) 7038 return self._parse_placeholder() 7039 7040 def _parse_var( 7041 self, 7042 any_token: bool = False, 7043 tokens: t.Optional[t.Collection[TokenType]] = None, 7044 upper: bool = False, 7045 ) -> t.Optional[exp.Expression]: 7046 if ( 7047 (any_token and self._advance_any()) 7048 or self._match(TokenType.VAR) 7049 or (self._match_set(tokens) if tokens else False) 7050 ): 7051 return self.expression( 7052 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7053 ) 7054 return self._parse_placeholder() 7055 7056 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7057 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7058 self._advance() 7059 return self._prev 7060 return None 7061 7062 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7063 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7064 7065 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7066 return self._parse_primary() or self._parse_var(any_token=True) 7067 7068 def _parse_null(self) -> t.Optional[exp.Expression]: 7069 if self._match_set(self.NULL_TOKENS): 7070 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7071 return self._parse_placeholder() 7072 7073 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7074 if self._match(TokenType.TRUE): 7075 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7076 if self._match(TokenType.FALSE): 7077 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7078 return self._parse_placeholder() 7079 7080 def _parse_star(self) -> t.Optional[exp.Expression]: 7081 if self._match(TokenType.STAR): 7082 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7083 return self._parse_placeholder() 7084 7085 def _parse_parameter(self) -> exp.Parameter: 7086 this = self._parse_identifier() or self._parse_primary_or_var() 7087 return self.expression(exp.Parameter, this=this) 7088 7089 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7090 if self._match_set(self.PLACEHOLDER_PARSERS): 7091 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7092 if placeholder: 7093 return placeholder 7094 self._advance(-1) 7095 return None 7096 7097 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7098 if not self._match_texts(keywords): 7099 return None 7100 if self._match(TokenType.L_PAREN, advance=False): 7101 return self._parse_wrapped_csv(self._parse_expression) 7102 7103 expression = self._parse_expression() 7104 return [expression] if expression else None 7105 7106 def _parse_csv( 7107 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7108 ) -> t.List[exp.Expression]: 7109 parse_result = parse_method() 7110 items = [parse_result] if parse_result is not None else [] 7111 7112 while self._match(sep): 7113 self._add_comments(parse_result) 7114 parse_result = parse_method() 7115 if parse_result is not None: 7116 items.append(parse_result) 7117 7118 return items 7119 7120 def _parse_tokens( 7121 self, parse_method: t.Callable, expressions: t.Dict 7122 ) -> t.Optional[exp.Expression]: 7123 this = parse_method() 7124 7125 while self._match_set(expressions): 7126 this = self.expression( 7127 expressions[self._prev.token_type], 7128 this=this, 7129 comments=self._prev_comments, 7130 expression=parse_method(), 7131 ) 7132 7133 return this 7134 7135 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7136 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7137 7138 def _parse_wrapped_csv( 7139 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7140 ) -> t.List[exp.Expression]: 7141 return self._parse_wrapped( 7142 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7143 ) 7144 7145 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7146 wrapped = self._match(TokenType.L_PAREN) 7147 if not wrapped and not optional: 7148 self.raise_error("Expecting (") 7149 parse_result = parse_method() 7150 if wrapped: 7151 self._match_r_paren() 7152 return parse_result 7153 7154 def _parse_expressions(self) -> t.List[exp.Expression]: 7155 return self._parse_csv(self._parse_expression) 7156 7157 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7158 return self._parse_select() or self._parse_set_operations( 7159 self._parse_alias(self._parse_assignment(), explicit=True) 7160 if alias 7161 else self._parse_assignment() 7162 ) 7163 7164 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7165 return self._parse_query_modifiers( 7166 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7167 ) 7168 7169 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7170 this = None 7171 if self._match_texts(self.TRANSACTION_KIND): 7172 this = self._prev.text 7173 7174 self._match_texts(("TRANSACTION", "WORK")) 7175 7176 modes = [] 7177 while True: 7178 mode = [] 7179 while self._match(TokenType.VAR): 7180 mode.append(self._prev.text) 7181 7182 if mode: 7183 modes.append(" ".join(mode)) 7184 if not self._match(TokenType.COMMA): 7185 break 7186 7187 return self.expression(exp.Transaction, this=this, modes=modes) 7188 7189 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7190 chain = None 7191 savepoint = None 7192 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7193 7194 self._match_texts(("TRANSACTION", "WORK")) 7195 7196 if self._match_text_seq("TO"): 7197 self._match_text_seq("SAVEPOINT") 7198 savepoint = self._parse_id_var() 7199 7200 if self._match(TokenType.AND): 7201 chain = not self._match_text_seq("NO") 7202 self._match_text_seq("CHAIN") 7203 7204 if is_rollback: 7205 return self.expression(exp.Rollback, savepoint=savepoint) 7206 7207 return self.expression(exp.Commit, chain=chain) 7208 7209 def _parse_refresh(self) -> exp.Refresh: 7210 self._match(TokenType.TABLE) 7211 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7212 7213 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7214 if not self._match_text_seq("ADD"): 7215 return None 7216 7217 self._match(TokenType.COLUMN) 7218 exists_column = self._parse_exists(not_=True) 7219 expression = self._parse_field_def() 7220 7221 if expression: 7222 expression.set("exists", exists_column) 7223 7224 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7225 if self._match_texts(("FIRST", "AFTER")): 7226 position = self._prev.text 7227 column_position = self.expression( 7228 exp.ColumnPosition, this=self._parse_column(), position=position 7229 ) 7230 expression.set("position", column_position) 7231 7232 return expression 7233 7234 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7235 drop = self._match(TokenType.DROP) and self._parse_drop() 7236 if drop and not isinstance(drop, exp.Command): 7237 drop.set("kind", drop.args.get("kind", "COLUMN")) 7238 return drop 7239 7240 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7241 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7242 return self.expression( 7243 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7244 ) 7245 7246 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7247 index = self._index - 1 7248 7249 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7250 return self._parse_csv( 7251 lambda: self.expression( 7252 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7253 ) 7254 ) 7255 7256 self._retreat(index) 7257 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7258 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7259 7260 if self._match_text_seq("ADD", "COLUMNS"): 7261 schema = self._parse_schema() 7262 if schema: 7263 return [schema] 7264 return [] 7265 7266 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7267 7268 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7269 if self._match_texts(self.ALTER_ALTER_PARSERS): 7270 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7271 7272 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7273 # keyword after ALTER we default to parsing this statement 7274 self._match(TokenType.COLUMN) 7275 column = self._parse_field(any_token=True) 7276 7277 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7278 return self.expression(exp.AlterColumn, this=column, drop=True) 7279 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7280 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7281 if self._match(TokenType.COMMENT): 7282 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7283 if self._match_text_seq("DROP", "NOT", "NULL"): 7284 return self.expression( 7285 exp.AlterColumn, 7286 this=column, 7287 drop=True, 7288 allow_null=True, 7289 ) 7290 if self._match_text_seq("SET", "NOT", "NULL"): 7291 return self.expression( 7292 exp.AlterColumn, 7293 this=column, 7294 allow_null=False, 7295 ) 7296 7297 if self._match_text_seq("SET", "VISIBLE"): 7298 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7299 if self._match_text_seq("SET", "INVISIBLE"): 7300 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7301 7302 self._match_text_seq("SET", "DATA") 7303 self._match_text_seq("TYPE") 7304 return self.expression( 7305 exp.AlterColumn, 7306 this=column, 7307 dtype=self._parse_types(), 7308 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7309 using=self._match(TokenType.USING) and self._parse_assignment(), 7310 ) 7311 7312 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7313 if self._match_texts(("ALL", "EVEN", "AUTO")): 7314 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7315 7316 self._match_text_seq("KEY", "DISTKEY") 7317 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7318 7319 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7320 if compound: 7321 self._match_text_seq("SORTKEY") 7322 7323 if self._match(TokenType.L_PAREN, advance=False): 7324 return self.expression( 7325 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7326 ) 7327 7328 self._match_texts(("AUTO", "NONE")) 7329 return self.expression( 7330 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7331 ) 7332 7333 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7334 index = self._index - 1 7335 7336 partition_exists = self._parse_exists() 7337 if self._match(TokenType.PARTITION, advance=False): 7338 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7339 7340 self._retreat(index) 7341 return self._parse_csv(self._parse_drop_column) 7342 7343 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7344 if self._match(TokenType.COLUMN): 7345 exists = self._parse_exists() 7346 old_column = self._parse_column() 7347 to = self._match_text_seq("TO") 7348 new_column = self._parse_column() 7349 7350 if old_column is None or to is None or new_column is None: 7351 return None 7352 7353 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7354 7355 self._match_text_seq("TO") 7356 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7357 7358 def _parse_alter_table_set(self) -> exp.AlterSet: 7359 alter_set = self.expression(exp.AlterSet) 7360 7361 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7362 "TABLE", "PROPERTIES" 7363 ): 7364 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7365 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7366 alter_set.set("expressions", [self._parse_assignment()]) 7367 elif self._match_texts(("LOGGED", "UNLOGGED")): 7368 alter_set.set("option", exp.var(self._prev.text.upper())) 7369 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7370 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7371 elif self._match_text_seq("LOCATION"): 7372 alter_set.set("location", self._parse_field()) 7373 elif self._match_text_seq("ACCESS", "METHOD"): 7374 alter_set.set("access_method", self._parse_field()) 7375 elif self._match_text_seq("TABLESPACE"): 7376 alter_set.set("tablespace", self._parse_field()) 7377 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7378 alter_set.set("file_format", [self._parse_field()]) 7379 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7380 alter_set.set("file_format", self._parse_wrapped_options()) 7381 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7382 alter_set.set("copy_options", self._parse_wrapped_options()) 7383 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7384 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7385 else: 7386 if self._match_text_seq("SERDE"): 7387 alter_set.set("serde", self._parse_field()) 7388 7389 alter_set.set("expressions", [self._parse_properties()]) 7390 7391 return alter_set 7392 7393 def _parse_alter(self) -> exp.Alter | exp.Command: 7394 start = self._prev 7395 7396 alter_token = self._match_set(self.ALTERABLES) and self._prev 7397 if not alter_token: 7398 return self._parse_as_command(start) 7399 7400 exists = self._parse_exists() 7401 only = self._match_text_seq("ONLY") 7402 this = self._parse_table(schema=True) 7403 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7404 7405 if self._next: 7406 self._advance() 7407 7408 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7409 if parser: 7410 actions = ensure_list(parser(self)) 7411 not_valid = self._match_text_seq("NOT", "VALID") 7412 options = self._parse_csv(self._parse_property) 7413 7414 if not self._curr and actions: 7415 return self.expression( 7416 exp.Alter, 7417 this=this, 7418 kind=alter_token.text.upper(), 7419 exists=exists, 7420 actions=actions, 7421 only=only, 7422 options=options, 7423 cluster=cluster, 7424 not_valid=not_valid, 7425 ) 7426 7427 return self._parse_as_command(start) 7428 7429 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7430 start = self._prev 7431 # https://duckdb.org/docs/sql/statements/analyze 7432 if not self._curr: 7433 return self.expression(exp.Analyze) 7434 7435 options = [] 7436 while self._match_texts(self.ANALYZE_STYLES): 7437 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7438 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7439 else: 7440 options.append(self._prev.text.upper()) 7441 7442 this: t.Optional[exp.Expression] = None 7443 inner_expression: t.Optional[exp.Expression] = None 7444 7445 kind = self._curr and self._curr.text.upper() 7446 7447 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7448 this = self._parse_table_parts() 7449 elif self._match_text_seq("TABLES"): 7450 if self._match_set((TokenType.FROM, TokenType.IN)): 7451 kind = f"{kind} {self._prev.text.upper()}" 7452 this = self._parse_table(schema=True, is_db_reference=True) 7453 elif self._match_text_seq("DATABASE"): 7454 this = self._parse_table(schema=True, is_db_reference=True) 7455 elif self._match_text_seq("CLUSTER"): 7456 this = self._parse_table() 7457 # Try matching inner expr keywords before fallback to parse table. 7458 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7459 kind = None 7460 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7461 else: 7462 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7463 kind = None 7464 this = self._parse_table_parts() 7465 7466 partition = self._try_parse(self._parse_partition) 7467 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7468 return self._parse_as_command(start) 7469 7470 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7471 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7472 "WITH", "ASYNC", "MODE" 7473 ): 7474 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7475 else: 7476 mode = None 7477 7478 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7479 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7480 7481 properties = self._parse_properties() 7482 return self.expression( 7483 exp.Analyze, 7484 kind=kind, 7485 this=this, 7486 mode=mode, 7487 partition=partition, 7488 properties=properties, 7489 expression=inner_expression, 7490 options=options, 7491 ) 7492 7493 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7494 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7495 this = None 7496 kind = self._prev.text.upper() 7497 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7498 expressions = [] 7499 7500 if not self._match_text_seq("STATISTICS"): 7501 self.raise_error("Expecting token STATISTICS") 7502 7503 if self._match_text_seq("NOSCAN"): 7504 this = "NOSCAN" 7505 elif self._match(TokenType.FOR): 7506 if self._match_text_seq("ALL", "COLUMNS"): 7507 this = "FOR ALL COLUMNS" 7508 if self._match_texts("COLUMNS"): 7509 this = "FOR COLUMNS" 7510 expressions = self._parse_csv(self._parse_column_reference) 7511 elif self._match_text_seq("SAMPLE"): 7512 sample = self._parse_number() 7513 expressions = [ 7514 self.expression( 7515 exp.AnalyzeSample, 7516 sample=sample, 7517 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7518 ) 7519 ] 7520 7521 return self.expression( 7522 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7523 ) 7524 7525 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7526 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7527 kind = None 7528 this = None 7529 expression: t.Optional[exp.Expression] = None 7530 if self._match_text_seq("REF", "UPDATE"): 7531 kind = "REF" 7532 this = "UPDATE" 7533 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7534 this = "UPDATE SET DANGLING TO NULL" 7535 elif self._match_text_seq("STRUCTURE"): 7536 kind = "STRUCTURE" 7537 if self._match_text_seq("CASCADE", "FAST"): 7538 this = "CASCADE FAST" 7539 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7540 ("ONLINE", "OFFLINE") 7541 ): 7542 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7543 expression = self._parse_into() 7544 7545 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7546 7547 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7548 this = self._prev.text.upper() 7549 if self._match_text_seq("COLUMNS"): 7550 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7551 return None 7552 7553 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7554 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7555 if self._match_text_seq("STATISTICS"): 7556 return self.expression(exp.AnalyzeDelete, kind=kind) 7557 return None 7558 7559 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7560 if self._match_text_seq("CHAINED", "ROWS"): 7561 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7562 return None 7563 7564 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7565 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7566 this = self._prev.text.upper() 7567 expression: t.Optional[exp.Expression] = None 7568 expressions = [] 7569 update_options = None 7570 7571 if self._match_text_seq("HISTOGRAM", "ON"): 7572 expressions = self._parse_csv(self._parse_column_reference) 7573 with_expressions = [] 7574 while self._match(TokenType.WITH): 7575 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7576 if self._match_texts(("SYNC", "ASYNC")): 7577 if self._match_text_seq("MODE", advance=False): 7578 with_expressions.append(f"{self._prev.text.upper()} MODE") 7579 self._advance() 7580 else: 7581 buckets = self._parse_number() 7582 if self._match_text_seq("BUCKETS"): 7583 with_expressions.append(f"{buckets} BUCKETS") 7584 if with_expressions: 7585 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7586 7587 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7588 TokenType.UPDATE, advance=False 7589 ): 7590 update_options = self._prev.text.upper() 7591 self._advance() 7592 elif self._match_text_seq("USING", "DATA"): 7593 expression = self.expression(exp.UsingData, this=self._parse_string()) 7594 7595 return self.expression( 7596 exp.AnalyzeHistogram, 7597 this=this, 7598 expressions=expressions, 7599 expression=expression, 7600 update_options=update_options, 7601 ) 7602 7603 def _parse_merge(self) -> exp.Merge: 7604 self._match(TokenType.INTO) 7605 target = self._parse_table() 7606 7607 if target and self._match(TokenType.ALIAS, advance=False): 7608 target.set("alias", self._parse_table_alias()) 7609 7610 self._match(TokenType.USING) 7611 using = self._parse_table() 7612 7613 self._match(TokenType.ON) 7614 on = self._parse_assignment() 7615 7616 return self.expression( 7617 exp.Merge, 7618 this=target, 7619 using=using, 7620 on=on, 7621 whens=self._parse_when_matched(), 7622 returning=self._parse_returning(), 7623 ) 7624 7625 def _parse_when_matched(self) -> exp.Whens: 7626 whens = [] 7627 7628 while self._match(TokenType.WHEN): 7629 matched = not self._match(TokenType.NOT) 7630 self._match_text_seq("MATCHED") 7631 source = ( 7632 False 7633 if self._match_text_seq("BY", "TARGET") 7634 else self._match_text_seq("BY", "SOURCE") 7635 ) 7636 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7637 7638 self._match(TokenType.THEN) 7639 7640 if self._match(TokenType.INSERT): 7641 this = self._parse_star() 7642 if this: 7643 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7644 else: 7645 then = self.expression( 7646 exp.Insert, 7647 this=exp.var("ROW") 7648 if self._match_text_seq("ROW") 7649 else self._parse_value(values=False), 7650 expression=self._match_text_seq("VALUES") and self._parse_value(), 7651 ) 7652 elif self._match(TokenType.UPDATE): 7653 expressions = self._parse_star() 7654 if expressions: 7655 then = self.expression(exp.Update, expressions=expressions) 7656 else: 7657 then = self.expression( 7658 exp.Update, 7659 expressions=self._match(TokenType.SET) 7660 and self._parse_csv(self._parse_equality), 7661 ) 7662 elif self._match(TokenType.DELETE): 7663 then = self.expression(exp.Var, this=self._prev.text) 7664 else: 7665 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7666 7667 whens.append( 7668 self.expression( 7669 exp.When, 7670 matched=matched, 7671 source=source, 7672 condition=condition, 7673 then=then, 7674 ) 7675 ) 7676 return self.expression(exp.Whens, expressions=whens) 7677 7678 def _parse_show(self) -> t.Optional[exp.Expression]: 7679 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7680 if parser: 7681 return parser(self) 7682 return self._parse_as_command(self._prev) 7683 7684 def _parse_set_item_assignment( 7685 self, kind: t.Optional[str] = None 7686 ) -> t.Optional[exp.Expression]: 7687 index = self._index 7688 7689 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7690 return self._parse_set_transaction(global_=kind == "GLOBAL") 7691 7692 left = self._parse_primary() or self._parse_column() 7693 assignment_delimiter = self._match_texts(("=", "TO")) 7694 7695 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7696 self._retreat(index) 7697 return None 7698 7699 right = self._parse_statement() or self._parse_id_var() 7700 if isinstance(right, (exp.Column, exp.Identifier)): 7701 right = exp.var(right.name) 7702 7703 this = self.expression(exp.EQ, this=left, expression=right) 7704 return self.expression(exp.SetItem, this=this, kind=kind) 7705 7706 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7707 self._match_text_seq("TRANSACTION") 7708 characteristics = self._parse_csv( 7709 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7710 ) 7711 return self.expression( 7712 exp.SetItem, 7713 expressions=characteristics, 7714 kind="TRANSACTION", 7715 **{"global": global_}, # type: ignore 7716 ) 7717 7718 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7719 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7720 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7721 7722 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7723 index = self._index 7724 set_ = self.expression( 7725 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7726 ) 7727 7728 if self._curr: 7729 self._retreat(index) 7730 return self._parse_as_command(self._prev) 7731 7732 return set_ 7733 7734 def _parse_var_from_options( 7735 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7736 ) -> t.Optional[exp.Var]: 7737 start = self._curr 7738 if not start: 7739 return None 7740 7741 option = start.text.upper() 7742 continuations = options.get(option) 7743 7744 index = self._index 7745 self._advance() 7746 for keywords in continuations or []: 7747 if isinstance(keywords, str): 7748 keywords = (keywords,) 7749 7750 if self._match_text_seq(*keywords): 7751 option = f"{option} {' '.join(keywords)}" 7752 break 7753 else: 7754 if continuations or continuations is None: 7755 if raise_unmatched: 7756 self.raise_error(f"Unknown option {option}") 7757 7758 self._retreat(index) 7759 return None 7760 7761 return exp.var(option) 7762 7763 def _parse_as_command(self, start: Token) -> exp.Command: 7764 while self._curr: 7765 self._advance() 7766 text = self._find_sql(start, self._prev) 7767 size = len(start.text) 7768 self._warn_unsupported() 7769 return exp.Command(this=text[:size], expression=text[size:]) 7770 7771 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7772 settings = [] 7773 7774 self._match_l_paren() 7775 kind = self._parse_id_var() 7776 7777 if self._match(TokenType.L_PAREN): 7778 while True: 7779 key = self._parse_id_var() 7780 value = self._parse_primary() 7781 if not key and value is None: 7782 break 7783 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7784 self._match(TokenType.R_PAREN) 7785 7786 self._match_r_paren() 7787 7788 return self.expression( 7789 exp.DictProperty, 7790 this=this, 7791 kind=kind.this if kind else None, 7792 settings=settings, 7793 ) 7794 7795 def _parse_dict_range(self, this: str) -> exp.DictRange: 7796 self._match_l_paren() 7797 has_min = self._match_text_seq("MIN") 7798 if has_min: 7799 min = self._parse_var() or self._parse_primary() 7800 self._match_text_seq("MAX") 7801 max = self._parse_var() or self._parse_primary() 7802 else: 7803 max = self._parse_var() or self._parse_primary() 7804 min = exp.Literal.number(0) 7805 self._match_r_paren() 7806 return self.expression(exp.DictRange, this=this, min=min, max=max) 7807 7808 def _parse_comprehension( 7809 self, this: t.Optional[exp.Expression] 7810 ) -> t.Optional[exp.Comprehension]: 7811 index = self._index 7812 expression = self._parse_column() 7813 if not self._match(TokenType.IN): 7814 self._retreat(index - 1) 7815 return None 7816 iterator = self._parse_column() 7817 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7818 return self.expression( 7819 exp.Comprehension, 7820 this=this, 7821 expression=expression, 7822 iterator=iterator, 7823 condition=condition, 7824 ) 7825 7826 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7827 if self._match(TokenType.HEREDOC_STRING): 7828 return self.expression(exp.Heredoc, this=self._prev.text) 7829 7830 if not self._match_text_seq("$"): 7831 return None 7832 7833 tags = ["$"] 7834 tag_text = None 7835 7836 if self._is_connected(): 7837 self._advance() 7838 tags.append(self._prev.text.upper()) 7839 else: 7840 self.raise_error("No closing $ found") 7841 7842 if tags[-1] != "$": 7843 if self._is_connected() and self._match_text_seq("$"): 7844 tag_text = tags[-1] 7845 tags.append("$") 7846 else: 7847 self.raise_error("No closing $ found") 7848 7849 heredoc_start = self._curr 7850 7851 while self._curr: 7852 if self._match_text_seq(*tags, advance=False): 7853 this = self._find_sql(heredoc_start, self._prev) 7854 self._advance(len(tags)) 7855 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7856 7857 self._advance() 7858 7859 self.raise_error(f"No closing {''.join(tags)} found") 7860 return None 7861 7862 def _find_parser( 7863 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7864 ) -> t.Optional[t.Callable]: 7865 if not self._curr: 7866 return None 7867 7868 index = self._index 7869 this = [] 7870 while True: 7871 # The current token might be multiple words 7872 curr = self._curr.text.upper() 7873 key = curr.split(" ") 7874 this.append(curr) 7875 7876 self._advance() 7877 result, trie = in_trie(trie, key) 7878 if result == TrieResult.FAILED: 7879 break 7880 7881 if result == TrieResult.EXISTS: 7882 subparser = parsers[" ".join(this)] 7883 return subparser 7884 7885 self._retreat(index) 7886 return None 7887 7888 def _match(self, token_type, advance=True, expression=None): 7889 if not self._curr: 7890 return None 7891 7892 if self._curr.token_type == token_type: 7893 if advance: 7894 self._advance() 7895 self._add_comments(expression) 7896 return True 7897 7898 return None 7899 7900 def _match_set(self, types, advance=True): 7901 if not self._curr: 7902 return None 7903 7904 if self._curr.token_type in types: 7905 if advance: 7906 self._advance() 7907 return True 7908 7909 return None 7910 7911 def _match_pair(self, token_type_a, token_type_b, advance=True): 7912 if not self._curr or not self._next: 7913 return None 7914 7915 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7916 if advance: 7917 self._advance(2) 7918 return True 7919 7920 return None 7921 7922 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7923 if not self._match(TokenType.L_PAREN, expression=expression): 7924 self.raise_error("Expecting (") 7925 7926 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7927 if not self._match(TokenType.R_PAREN, expression=expression): 7928 self.raise_error("Expecting )") 7929 7930 def _match_texts(self, texts, advance=True): 7931 if ( 7932 self._curr 7933 and self._curr.token_type != TokenType.STRING 7934 and self._curr.text.upper() in texts 7935 ): 7936 if advance: 7937 self._advance() 7938 return True 7939 return None 7940 7941 def _match_text_seq(self, *texts, advance=True): 7942 index = self._index 7943 for text in texts: 7944 if ( 7945 self._curr 7946 and self._curr.token_type != TokenType.STRING 7947 and self._curr.text.upper() == text 7948 ): 7949 self._advance() 7950 else: 7951 self._retreat(index) 7952 return None 7953 7954 if not advance: 7955 self._retreat(index) 7956 7957 return True 7958 7959 def _replace_lambda( 7960 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7961 ) -> t.Optional[exp.Expression]: 7962 if not node: 7963 return node 7964 7965 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7966 7967 for column in node.find_all(exp.Column): 7968 typ = lambda_types.get(column.parts[0].name) 7969 if typ is not None: 7970 dot_or_id = column.to_dot() if column.table else column.this 7971 7972 if typ: 7973 dot_or_id = self.expression( 7974 exp.Cast, 7975 this=dot_or_id, 7976 to=typ, 7977 ) 7978 7979 parent = column.parent 7980 7981 while isinstance(parent, exp.Dot): 7982 if not isinstance(parent.parent, exp.Dot): 7983 parent.replace(dot_or_id) 7984 break 7985 parent = parent.parent 7986 else: 7987 if column is node: 7988 node = dot_or_id 7989 else: 7990 column.replace(dot_or_id) 7991 return node 7992 7993 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7994 start = self._prev 7995 7996 # Not to be confused with TRUNCATE(number, decimals) function call 7997 if self._match(TokenType.L_PAREN): 7998 self._retreat(self._index - 2) 7999 return self._parse_function() 8000 8001 # Clickhouse supports TRUNCATE DATABASE as well 8002 is_database = self._match(TokenType.DATABASE) 8003 8004 self._match(TokenType.TABLE) 8005 8006 exists = self._parse_exists(not_=False) 8007 8008 expressions = self._parse_csv( 8009 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8010 ) 8011 8012 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8013 8014 if self._match_text_seq("RESTART", "IDENTITY"): 8015 identity = "RESTART" 8016 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8017 identity = "CONTINUE" 8018 else: 8019 identity = None 8020 8021 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8022 option = self._prev.text 8023 else: 8024 option = None 8025 8026 partition = self._parse_partition() 8027 8028 # Fallback case 8029 if self._curr: 8030 return self._parse_as_command(start) 8031 8032 return self.expression( 8033 exp.TruncateTable, 8034 expressions=expressions, 8035 is_database=is_database, 8036 exists=exists, 8037 cluster=cluster, 8038 identity=identity, 8039 option=option, 8040 partition=partition, 8041 ) 8042 8043 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8044 this = self._parse_ordered(self._parse_opclass) 8045 8046 if not self._match(TokenType.WITH): 8047 return this 8048 8049 op = self._parse_var(any_token=True) 8050 8051 return self.expression(exp.WithOperator, this=this, op=op) 8052 8053 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8054 self._match(TokenType.EQ) 8055 self._match(TokenType.L_PAREN) 8056 8057 opts: t.List[t.Optional[exp.Expression]] = [] 8058 option: exp.Expression | None 8059 while self._curr and not self._match(TokenType.R_PAREN): 8060 if self._match_text_seq("FORMAT_NAME", "="): 8061 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8062 option = self._parse_format_name() 8063 else: 8064 option = self._parse_property() 8065 8066 if option is None: 8067 self.raise_error("Unable to parse option") 8068 break 8069 8070 opts.append(option) 8071 8072 return opts 8073 8074 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8075 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8076 8077 options = [] 8078 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8079 option = self._parse_var(any_token=True) 8080 prev = self._prev.text.upper() 8081 8082 # Different dialects might separate options and values by white space, "=" and "AS" 8083 self._match(TokenType.EQ) 8084 self._match(TokenType.ALIAS) 8085 8086 param = self.expression(exp.CopyParameter, this=option) 8087 8088 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8089 TokenType.L_PAREN, advance=False 8090 ): 8091 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8092 param.set("expressions", self._parse_wrapped_options()) 8093 elif prev == "FILE_FORMAT": 8094 # T-SQL's external file format case 8095 param.set("expression", self._parse_field()) 8096 else: 8097 param.set("expression", self._parse_unquoted_field()) 8098 8099 options.append(param) 8100 self._match(sep) 8101 8102 return options 8103 8104 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8105 expr = self.expression(exp.Credentials) 8106 8107 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8108 expr.set("storage", self._parse_field()) 8109 if self._match_text_seq("CREDENTIALS"): 8110 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8111 creds = ( 8112 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8113 ) 8114 expr.set("credentials", creds) 8115 if self._match_text_seq("ENCRYPTION"): 8116 expr.set("encryption", self._parse_wrapped_options()) 8117 if self._match_text_seq("IAM_ROLE"): 8118 expr.set("iam_role", self._parse_field()) 8119 if self._match_text_seq("REGION"): 8120 expr.set("region", self._parse_field()) 8121 8122 return expr 8123 8124 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8125 return self._parse_field() 8126 8127 def _parse_copy(self) -> exp.Copy | exp.Command: 8128 start = self._prev 8129 8130 self._match(TokenType.INTO) 8131 8132 this = ( 8133 self._parse_select(nested=True, parse_subquery_alias=False) 8134 if self._match(TokenType.L_PAREN, advance=False) 8135 else self._parse_table(schema=True) 8136 ) 8137 8138 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8139 8140 files = self._parse_csv(self._parse_file_location) 8141 credentials = self._parse_credentials() 8142 8143 self._match_text_seq("WITH") 8144 8145 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8146 8147 # Fallback case 8148 if self._curr: 8149 return self._parse_as_command(start) 8150 8151 return self.expression( 8152 exp.Copy, 8153 this=this, 8154 kind=kind, 8155 credentials=credentials, 8156 files=files, 8157 params=params, 8158 ) 8159 8160 def _parse_normalize(self) -> exp.Normalize: 8161 return self.expression( 8162 exp.Normalize, 8163 this=self._parse_bitwise(), 8164 form=self._match(TokenType.COMMA) and self._parse_var(), 8165 ) 8166 8167 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8168 args = self._parse_csv(lambda: self._parse_lambda()) 8169 8170 this = seq_get(args, 0) 8171 decimals = seq_get(args, 1) 8172 8173 return expr_type( 8174 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8175 ) 8176 8177 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8178 if self._match_text_seq("COLUMNS", "(", advance=False): 8179 this = self._parse_function() 8180 if isinstance(this, exp.Columns): 8181 this.set("unpack", True) 8182 return this 8183 8184 return self.expression( 8185 exp.Star, 8186 **{ # type: ignore 8187 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8188 "replace": self._parse_star_op("REPLACE"), 8189 "rename": self._parse_star_op("RENAME"), 8190 }, 8191 ) 8192 8193 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8194 privilege_parts = [] 8195 8196 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8197 # (end of privilege list) or L_PAREN (start of column list) are met 8198 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8199 privilege_parts.append(self._curr.text.upper()) 8200 self._advance() 8201 8202 this = exp.var(" ".join(privilege_parts)) 8203 expressions = ( 8204 self._parse_wrapped_csv(self._parse_column) 8205 if self._match(TokenType.L_PAREN, advance=False) 8206 else None 8207 ) 8208 8209 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8210 8211 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8212 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8213 principal = self._parse_id_var() 8214 8215 if not principal: 8216 return None 8217 8218 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8219 8220 def _parse_grant(self) -> exp.Grant | exp.Command: 8221 start = self._prev 8222 8223 privileges = self._parse_csv(self._parse_grant_privilege) 8224 8225 self._match(TokenType.ON) 8226 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8227 8228 # Attempt to parse the securable e.g. MySQL allows names 8229 # such as "foo.*", "*.*" which are not easily parseable yet 8230 securable = self._try_parse(self._parse_table_parts) 8231 8232 if not securable or not self._match_text_seq("TO"): 8233 return self._parse_as_command(start) 8234 8235 principals = self._parse_csv(self._parse_grant_principal) 8236 8237 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8238 8239 if self._curr: 8240 return self._parse_as_command(start) 8241 8242 return self.expression( 8243 exp.Grant, 8244 privileges=privileges, 8245 kind=kind, 8246 securable=securable, 8247 principals=principals, 8248 grant_option=grant_option, 8249 ) 8250 8251 def _parse_overlay(self) -> exp.Overlay: 8252 return self.expression( 8253 exp.Overlay, 8254 **{ # type: ignore 8255 "this": self._parse_bitwise(), 8256 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8257 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8258 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8259 }, 8260 ) 8261 8262 def _parse_format_name(self) -> exp.Property: 8263 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8264 # for FILE_FORMAT = <format_name> 8265 return self.expression( 8266 exp.Property, 8267 this=exp.var("FORMAT_NAME"), 8268 value=self._parse_string() or self._parse_table_parts(), 8269 ) 8270 8271 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8272 args: t.List[exp.Expression] = [] 8273 8274 if self._match(TokenType.DISTINCT): 8275 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8276 self._match(TokenType.COMMA) 8277 8278 args.extend(self._parse_csv(self._parse_assignment)) 8279 8280 return self.expression( 8281 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8282 ) 8283 8284 def _identifier_expression( 8285 self, token: t.Optional[Token] = None, **kwargs: t.Any 8286 ) -> exp.Identifier: 8287 token = token or self._prev 8288 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8289 expression.update_positions(token) 8290 return expression
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
178class Parser(metaclass=_Parser): 179 """ 180 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 181 182 Args: 183 error_level: The desired error level. 184 Default: ErrorLevel.IMMEDIATE 185 error_message_context: The amount of context to capture from a query string when displaying 186 the error message (in number of characters). 187 Default: 100 188 max_errors: Maximum number of error messages to include in a raised ParseError. 189 This is only relevant if error_level is ErrorLevel.RAISE. 190 Default: 3 191 """ 192 193 FUNCTIONS: t.Dict[str, t.Callable] = { 194 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 195 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 196 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 197 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "CHAR": lambda args: exp.Chr(expressions=args), 204 "CHR": lambda args: exp.Chr(expressions=args), 205 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 206 "CONCAT": lambda args, dialect: exp.Concat( 207 expressions=args, 208 safe=not dialect.STRICT_STRING_CONCAT, 209 coalesce=dialect.CONCAT_COALESCE, 210 ), 211 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 212 expressions=args, 213 safe=not dialect.STRICT_STRING_CONCAT, 214 coalesce=dialect.CONCAT_COALESCE, 215 ), 216 "CONVERT_TIMEZONE": build_convert_timezone, 217 "DATE_TO_DATE_STR": lambda args: exp.Cast( 218 this=seq_get(args, 0), 219 to=exp.DataType(this=exp.DataType.Type.TEXT), 220 ), 221 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 222 start=seq_get(args, 0), 223 end=seq_get(args, 1), 224 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 225 ), 226 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 227 "HEX": build_hex, 228 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 229 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 230 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 231 "LIKE": build_like, 232 "LOG": build_logarithm, 233 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 234 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 235 "LOWER": build_lower, 236 "LPAD": lambda args: build_pad(args), 237 "LEFTPAD": lambda args: build_pad(args), 238 "LTRIM": lambda args: build_trim(args), 239 "MOD": build_mod, 240 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 241 "RPAD": lambda args: build_pad(args, is_left=False), 242 "RTRIM": lambda args: build_trim(args, is_left=False), 243 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 244 if len(args) != 2 245 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 246 "STRPOS": exp.StrPosition.from_arg_list, 247 "CHARINDEX": lambda args: build_locate_strposition(args), 248 "INSTR": exp.StrPosition.from_arg_list, 249 "LOCATE": lambda args: build_locate_strposition(args), 250 "TIME_TO_TIME_STR": lambda args: exp.Cast( 251 this=seq_get(args, 0), 252 to=exp.DataType(this=exp.DataType.Type.TEXT), 253 ), 254 "TO_HEX": build_hex, 255 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 256 this=exp.Cast( 257 this=seq_get(args, 0), 258 to=exp.DataType(this=exp.DataType.Type.TEXT), 259 ), 260 start=exp.Literal.number(1), 261 length=exp.Literal.number(10), 262 ), 263 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 264 "UPPER": build_upper, 265 "VAR_MAP": build_var_map, 266 } 267 268 NO_PAREN_FUNCTIONS = { 269 TokenType.CURRENT_DATE: exp.CurrentDate, 270 TokenType.CURRENT_DATETIME: exp.CurrentDate, 271 TokenType.CURRENT_TIME: exp.CurrentTime, 272 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 273 TokenType.CURRENT_USER: exp.CurrentUser, 274 } 275 276 STRUCT_TYPE_TOKENS = { 277 TokenType.NESTED, 278 TokenType.OBJECT, 279 TokenType.STRUCT, 280 TokenType.UNION, 281 } 282 283 NESTED_TYPE_TOKENS = { 284 TokenType.ARRAY, 285 TokenType.LIST, 286 TokenType.LOWCARDINALITY, 287 TokenType.MAP, 288 TokenType.NULLABLE, 289 TokenType.RANGE, 290 *STRUCT_TYPE_TOKENS, 291 } 292 293 ENUM_TYPE_TOKENS = { 294 TokenType.DYNAMIC, 295 TokenType.ENUM, 296 TokenType.ENUM8, 297 TokenType.ENUM16, 298 } 299 300 AGGREGATE_TYPE_TOKENS = { 301 TokenType.AGGREGATEFUNCTION, 302 TokenType.SIMPLEAGGREGATEFUNCTION, 303 } 304 305 TYPE_TOKENS = { 306 TokenType.BIT, 307 TokenType.BOOLEAN, 308 TokenType.TINYINT, 309 TokenType.UTINYINT, 310 TokenType.SMALLINT, 311 TokenType.USMALLINT, 312 TokenType.INT, 313 TokenType.UINT, 314 TokenType.BIGINT, 315 TokenType.UBIGINT, 316 TokenType.INT128, 317 TokenType.UINT128, 318 TokenType.INT256, 319 TokenType.UINT256, 320 TokenType.MEDIUMINT, 321 TokenType.UMEDIUMINT, 322 TokenType.FIXEDSTRING, 323 TokenType.FLOAT, 324 TokenType.DOUBLE, 325 TokenType.UDOUBLE, 326 TokenType.CHAR, 327 TokenType.NCHAR, 328 TokenType.VARCHAR, 329 TokenType.NVARCHAR, 330 TokenType.BPCHAR, 331 TokenType.TEXT, 332 TokenType.MEDIUMTEXT, 333 TokenType.LONGTEXT, 334 TokenType.BLOB, 335 TokenType.MEDIUMBLOB, 336 TokenType.LONGBLOB, 337 TokenType.BINARY, 338 TokenType.VARBINARY, 339 TokenType.JSON, 340 TokenType.JSONB, 341 TokenType.INTERVAL, 342 TokenType.TINYBLOB, 343 TokenType.TINYTEXT, 344 TokenType.TIME, 345 TokenType.TIMETZ, 346 TokenType.TIMESTAMP, 347 TokenType.TIMESTAMP_S, 348 TokenType.TIMESTAMP_MS, 349 TokenType.TIMESTAMP_NS, 350 TokenType.TIMESTAMPTZ, 351 TokenType.TIMESTAMPLTZ, 352 TokenType.TIMESTAMPNTZ, 353 TokenType.DATETIME, 354 TokenType.DATETIME2, 355 TokenType.DATETIME64, 356 TokenType.SMALLDATETIME, 357 TokenType.DATE, 358 TokenType.DATE32, 359 TokenType.INT4RANGE, 360 TokenType.INT4MULTIRANGE, 361 TokenType.INT8RANGE, 362 TokenType.INT8MULTIRANGE, 363 TokenType.NUMRANGE, 364 TokenType.NUMMULTIRANGE, 365 TokenType.TSRANGE, 366 TokenType.TSMULTIRANGE, 367 TokenType.TSTZRANGE, 368 TokenType.TSTZMULTIRANGE, 369 TokenType.DATERANGE, 370 TokenType.DATEMULTIRANGE, 371 TokenType.DECIMAL, 372 TokenType.DECIMAL32, 373 TokenType.DECIMAL64, 374 TokenType.DECIMAL128, 375 TokenType.DECIMAL256, 376 TokenType.UDECIMAL, 377 TokenType.BIGDECIMAL, 378 TokenType.UUID, 379 TokenType.GEOGRAPHY, 380 TokenType.GEOMETRY, 381 TokenType.POINT, 382 TokenType.RING, 383 TokenType.LINESTRING, 384 TokenType.MULTILINESTRING, 385 TokenType.POLYGON, 386 TokenType.MULTIPOLYGON, 387 TokenType.HLLSKETCH, 388 TokenType.HSTORE, 389 TokenType.PSEUDO_TYPE, 390 TokenType.SUPER, 391 TokenType.SERIAL, 392 TokenType.SMALLSERIAL, 393 TokenType.BIGSERIAL, 394 TokenType.XML, 395 TokenType.YEAR, 396 TokenType.USERDEFINED, 397 TokenType.MONEY, 398 TokenType.SMALLMONEY, 399 TokenType.ROWVERSION, 400 TokenType.IMAGE, 401 TokenType.VARIANT, 402 TokenType.VECTOR, 403 TokenType.VOID, 404 TokenType.OBJECT, 405 TokenType.OBJECT_IDENTIFIER, 406 TokenType.INET, 407 TokenType.IPADDRESS, 408 TokenType.IPPREFIX, 409 TokenType.IPV4, 410 TokenType.IPV6, 411 TokenType.UNKNOWN, 412 TokenType.NOTHING, 413 TokenType.NULL, 414 TokenType.NAME, 415 TokenType.TDIGEST, 416 TokenType.DYNAMIC, 417 *ENUM_TYPE_TOKENS, 418 *NESTED_TYPE_TOKENS, 419 *AGGREGATE_TYPE_TOKENS, 420 } 421 422 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 423 TokenType.BIGINT: TokenType.UBIGINT, 424 TokenType.INT: TokenType.UINT, 425 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 426 TokenType.SMALLINT: TokenType.USMALLINT, 427 TokenType.TINYINT: TokenType.UTINYINT, 428 TokenType.DECIMAL: TokenType.UDECIMAL, 429 TokenType.DOUBLE: TokenType.UDOUBLE, 430 } 431 432 SUBQUERY_PREDICATES = { 433 TokenType.ANY: exp.Any, 434 TokenType.ALL: exp.All, 435 TokenType.EXISTS: exp.Exists, 436 TokenType.SOME: exp.Any, 437 } 438 439 RESERVED_TOKENS = { 440 *Tokenizer.SINGLE_TOKENS.values(), 441 TokenType.SELECT, 442 } - {TokenType.IDENTIFIER} 443 444 DB_CREATABLES = { 445 TokenType.DATABASE, 446 TokenType.DICTIONARY, 447 TokenType.FILE_FORMAT, 448 TokenType.MODEL, 449 TokenType.NAMESPACE, 450 TokenType.SCHEMA, 451 TokenType.SEQUENCE, 452 TokenType.SINK, 453 TokenType.SOURCE, 454 TokenType.STAGE, 455 TokenType.STORAGE_INTEGRATION, 456 TokenType.STREAMLIT, 457 TokenType.TABLE, 458 TokenType.TAG, 459 TokenType.VIEW, 460 TokenType.WAREHOUSE, 461 } 462 463 CREATABLES = { 464 TokenType.COLUMN, 465 TokenType.CONSTRAINT, 466 TokenType.FOREIGN_KEY, 467 TokenType.FUNCTION, 468 TokenType.INDEX, 469 TokenType.PROCEDURE, 470 *DB_CREATABLES, 471 } 472 473 ALTERABLES = { 474 TokenType.INDEX, 475 TokenType.TABLE, 476 TokenType.VIEW, 477 } 478 479 # Tokens that can represent identifiers 480 ID_VAR_TOKENS = { 481 TokenType.ALL, 482 TokenType.ATTACH, 483 TokenType.VAR, 484 TokenType.ANTI, 485 TokenType.APPLY, 486 TokenType.ASC, 487 TokenType.ASOF, 488 TokenType.AUTO_INCREMENT, 489 TokenType.BEGIN, 490 TokenType.BPCHAR, 491 TokenType.CACHE, 492 TokenType.CASE, 493 TokenType.COLLATE, 494 TokenType.COMMAND, 495 TokenType.COMMENT, 496 TokenType.COMMIT, 497 TokenType.CONSTRAINT, 498 TokenType.COPY, 499 TokenType.CUBE, 500 TokenType.CURRENT_SCHEMA, 501 TokenType.DEFAULT, 502 TokenType.DELETE, 503 TokenType.DESC, 504 TokenType.DESCRIBE, 505 TokenType.DETACH, 506 TokenType.DICTIONARY, 507 TokenType.DIV, 508 TokenType.END, 509 TokenType.EXECUTE, 510 TokenType.EXPORT, 511 TokenType.ESCAPE, 512 TokenType.FALSE, 513 TokenType.FIRST, 514 TokenType.FILTER, 515 TokenType.FINAL, 516 TokenType.FORMAT, 517 TokenType.FULL, 518 TokenType.GET, 519 TokenType.IDENTIFIER, 520 TokenType.IS, 521 TokenType.ISNULL, 522 TokenType.INTERVAL, 523 TokenType.KEEP, 524 TokenType.KILL, 525 TokenType.LEFT, 526 TokenType.LIMIT, 527 TokenType.LOAD, 528 TokenType.MERGE, 529 TokenType.NATURAL, 530 TokenType.NEXT, 531 TokenType.OFFSET, 532 TokenType.OPERATOR, 533 TokenType.ORDINALITY, 534 TokenType.OVERLAPS, 535 TokenType.OVERWRITE, 536 TokenType.PARTITION, 537 TokenType.PERCENT, 538 TokenType.PIVOT, 539 TokenType.PRAGMA, 540 TokenType.PUT, 541 TokenType.RANGE, 542 TokenType.RECURSIVE, 543 TokenType.REFERENCES, 544 TokenType.REFRESH, 545 TokenType.RENAME, 546 TokenType.REPLACE, 547 TokenType.RIGHT, 548 TokenType.ROLLUP, 549 TokenType.ROW, 550 TokenType.ROWS, 551 TokenType.SEMI, 552 TokenType.SET, 553 TokenType.SETTINGS, 554 TokenType.SHOW, 555 TokenType.TEMPORARY, 556 TokenType.TOP, 557 TokenType.TRUE, 558 TokenType.TRUNCATE, 559 TokenType.UNIQUE, 560 TokenType.UNNEST, 561 TokenType.UNPIVOT, 562 TokenType.UPDATE, 563 TokenType.USE, 564 TokenType.VOLATILE, 565 TokenType.WINDOW, 566 *CREATABLES, 567 *SUBQUERY_PREDICATES, 568 *TYPE_TOKENS, 569 *NO_PAREN_FUNCTIONS, 570 } 571 ID_VAR_TOKENS.remove(TokenType.UNION) 572 573 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 574 TokenType.ANTI, 575 TokenType.APPLY, 576 TokenType.ASOF, 577 TokenType.FULL, 578 TokenType.LEFT, 579 TokenType.LOCK, 580 TokenType.NATURAL, 581 TokenType.RIGHT, 582 TokenType.SEMI, 583 TokenType.WINDOW, 584 } 585 586 ALIAS_TOKENS = ID_VAR_TOKENS 587 588 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 589 590 ARRAY_CONSTRUCTORS = { 591 "ARRAY": exp.Array, 592 "LIST": exp.List, 593 } 594 595 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 596 597 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 598 599 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 600 601 FUNC_TOKENS = { 602 TokenType.COLLATE, 603 TokenType.COMMAND, 604 TokenType.CURRENT_DATE, 605 TokenType.CURRENT_DATETIME, 606 TokenType.CURRENT_SCHEMA, 607 TokenType.CURRENT_TIMESTAMP, 608 TokenType.CURRENT_TIME, 609 TokenType.CURRENT_USER, 610 TokenType.FILTER, 611 TokenType.FIRST, 612 TokenType.FORMAT, 613 TokenType.GET, 614 TokenType.GLOB, 615 TokenType.IDENTIFIER, 616 TokenType.INDEX, 617 TokenType.ISNULL, 618 TokenType.ILIKE, 619 TokenType.INSERT, 620 TokenType.LIKE, 621 TokenType.MERGE, 622 TokenType.NEXT, 623 TokenType.OFFSET, 624 TokenType.PRIMARY_KEY, 625 TokenType.RANGE, 626 TokenType.REPLACE, 627 TokenType.RLIKE, 628 TokenType.ROW, 629 TokenType.UNNEST, 630 TokenType.VAR, 631 TokenType.LEFT, 632 TokenType.RIGHT, 633 TokenType.SEQUENCE, 634 TokenType.DATE, 635 TokenType.DATETIME, 636 TokenType.TABLE, 637 TokenType.TIMESTAMP, 638 TokenType.TIMESTAMPTZ, 639 TokenType.TRUNCATE, 640 TokenType.WINDOW, 641 TokenType.XOR, 642 *TYPE_TOKENS, 643 *SUBQUERY_PREDICATES, 644 } 645 646 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.AND: exp.And, 648 } 649 650 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.COLON_EQ: exp.PropertyEQ, 652 } 653 654 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.OR: exp.Or, 656 } 657 658 EQUALITY = { 659 TokenType.EQ: exp.EQ, 660 TokenType.NEQ: exp.NEQ, 661 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 662 } 663 664 COMPARISON = { 665 TokenType.GT: exp.GT, 666 TokenType.GTE: exp.GTE, 667 TokenType.LT: exp.LT, 668 TokenType.LTE: exp.LTE, 669 } 670 671 BITWISE = { 672 TokenType.AMP: exp.BitwiseAnd, 673 TokenType.CARET: exp.BitwiseXor, 674 TokenType.PIPE: exp.BitwiseOr, 675 } 676 677 TERM = { 678 TokenType.DASH: exp.Sub, 679 TokenType.PLUS: exp.Add, 680 TokenType.MOD: exp.Mod, 681 TokenType.COLLATE: exp.Collate, 682 } 683 684 FACTOR = { 685 TokenType.DIV: exp.IntDiv, 686 TokenType.LR_ARROW: exp.Distance, 687 TokenType.SLASH: exp.Div, 688 TokenType.STAR: exp.Mul, 689 } 690 691 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 692 693 TIMES = { 694 TokenType.TIME, 695 TokenType.TIMETZ, 696 } 697 698 TIMESTAMPS = { 699 TokenType.TIMESTAMP, 700 TokenType.TIMESTAMPNTZ, 701 TokenType.TIMESTAMPTZ, 702 TokenType.TIMESTAMPLTZ, 703 *TIMES, 704 } 705 706 SET_OPERATIONS = { 707 TokenType.UNION, 708 TokenType.INTERSECT, 709 TokenType.EXCEPT, 710 } 711 712 JOIN_METHODS = { 713 TokenType.ASOF, 714 TokenType.NATURAL, 715 TokenType.POSITIONAL, 716 } 717 718 JOIN_SIDES = { 719 TokenType.LEFT, 720 TokenType.RIGHT, 721 TokenType.FULL, 722 } 723 724 JOIN_KINDS = { 725 TokenType.ANTI, 726 TokenType.CROSS, 727 TokenType.INNER, 728 TokenType.OUTER, 729 TokenType.SEMI, 730 TokenType.STRAIGHT_JOIN, 731 } 732 733 JOIN_HINTS: t.Set[str] = set() 734 735 LAMBDAS = { 736 TokenType.ARROW: lambda self, expressions: self.expression( 737 exp.Lambda, 738 this=self._replace_lambda( 739 self._parse_assignment(), 740 expressions, 741 ), 742 expressions=expressions, 743 ), 744 TokenType.FARROW: lambda self, expressions: self.expression( 745 exp.Kwarg, 746 this=exp.var(expressions[0].name), 747 expression=self._parse_assignment(), 748 ), 749 } 750 751 COLUMN_OPERATORS = { 752 TokenType.DOT: None, 753 TokenType.DOTCOLON: lambda self, this, to: self.expression( 754 exp.JSONCast, 755 this=this, 756 to=to, 757 ), 758 TokenType.DCOLON: lambda self, this, to: self.expression( 759 exp.Cast if self.STRICT_CAST else exp.TryCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.ARROW: lambda self, this, path: self.expression( 764 exp.JSONExtract, 765 this=this, 766 expression=self.dialect.to_json_path(path), 767 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 768 ), 769 TokenType.DARROW: lambda self, this, path: self.expression( 770 exp.JSONExtractScalar, 771 this=this, 772 expression=self.dialect.to_json_path(path), 773 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 774 ), 775 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 776 exp.JSONBExtract, 777 this=this, 778 expression=path, 779 ), 780 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 781 exp.JSONBExtractScalar, 782 this=this, 783 expression=path, 784 ), 785 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 786 exp.JSONBContains, 787 this=this, 788 expression=key, 789 ), 790 } 791 792 EXPRESSION_PARSERS = { 793 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 794 exp.Column: lambda self: self._parse_column(), 795 exp.Condition: lambda self: self._parse_assignment(), 796 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 797 exp.Expression: lambda self: self._parse_expression(), 798 exp.From: lambda self: self._parse_from(joins=True), 799 exp.Group: lambda self: self._parse_group(), 800 exp.Having: lambda self: self._parse_having(), 801 exp.Hint: lambda self: self._parse_hint_body(), 802 exp.Identifier: lambda self: self._parse_id_var(), 803 exp.Join: lambda self: self._parse_join(), 804 exp.Lambda: lambda self: self._parse_lambda(), 805 exp.Lateral: lambda self: self._parse_lateral(), 806 exp.Limit: lambda self: self._parse_limit(), 807 exp.Offset: lambda self: self._parse_offset(), 808 exp.Order: lambda self: self._parse_order(), 809 exp.Ordered: lambda self: self._parse_ordered(), 810 exp.Properties: lambda self: self._parse_properties(), 811 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 812 exp.Qualify: lambda self: self._parse_qualify(), 813 exp.Returning: lambda self: self._parse_returning(), 814 exp.Select: lambda self: self._parse_select(), 815 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 816 exp.Table: lambda self: self._parse_table_parts(), 817 exp.TableAlias: lambda self: self._parse_table_alias(), 818 exp.Tuple: lambda self: self._parse_value(values=False), 819 exp.Whens: lambda self: self._parse_when_matched(), 820 exp.Where: lambda self: self._parse_where(), 821 exp.Window: lambda self: self._parse_named_window(), 822 exp.With: lambda self: self._parse_with(), 823 "JOIN_TYPE": lambda self: self._parse_join_parts(), 824 } 825 826 STATEMENT_PARSERS = { 827 TokenType.ALTER: lambda self: self._parse_alter(), 828 TokenType.ANALYZE: lambda self: self._parse_analyze(), 829 TokenType.BEGIN: lambda self: self._parse_transaction(), 830 TokenType.CACHE: lambda self: self._parse_cache(), 831 TokenType.COMMENT: lambda self: self._parse_comment(), 832 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 833 TokenType.COPY: lambda self: self._parse_copy(), 834 TokenType.CREATE: lambda self: self._parse_create(), 835 TokenType.DELETE: lambda self: self._parse_delete(), 836 TokenType.DESC: lambda self: self._parse_describe(), 837 TokenType.DESCRIBE: lambda self: self._parse_describe(), 838 TokenType.DROP: lambda self: self._parse_drop(), 839 TokenType.GRANT: lambda self: self._parse_grant(), 840 TokenType.INSERT: lambda self: self._parse_insert(), 841 TokenType.KILL: lambda self: self._parse_kill(), 842 TokenType.LOAD: lambda self: self._parse_load(), 843 TokenType.MERGE: lambda self: self._parse_merge(), 844 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 845 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 846 TokenType.REFRESH: lambda self: self._parse_refresh(), 847 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 848 TokenType.SET: lambda self: self._parse_set(), 849 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 850 TokenType.UNCACHE: lambda self: self._parse_uncache(), 851 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 852 TokenType.UPDATE: lambda self: self._parse_update(), 853 TokenType.USE: lambda self: self._parse_use(), 854 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 855 } 856 857 UNARY_PARSERS = { 858 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 859 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 860 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 861 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 862 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 863 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 864 } 865 866 STRING_PARSERS = { 867 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 868 exp.RawString, this=token.text 869 ), 870 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 871 exp.National, this=token.text 872 ), 873 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 874 TokenType.STRING: lambda self, token: self.expression( 875 exp.Literal, this=token.text, is_string=True 876 ), 877 TokenType.UNICODE_STRING: lambda self, token: self.expression( 878 exp.UnicodeString, 879 this=token.text, 880 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 881 ), 882 } 883 884 NUMERIC_PARSERS = { 885 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 886 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 887 TokenType.HEX_STRING: lambda self, token: self.expression( 888 exp.HexString, 889 this=token.text, 890 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 891 ), 892 TokenType.NUMBER: lambda self, token: self.expression( 893 exp.Literal, this=token.text, is_string=False 894 ), 895 } 896 897 PRIMARY_PARSERS = { 898 **STRING_PARSERS, 899 **NUMERIC_PARSERS, 900 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 901 TokenType.NULL: lambda self, _: self.expression(exp.Null), 902 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 903 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 904 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 905 TokenType.STAR: lambda self, _: self._parse_star_ops(), 906 } 907 908 PLACEHOLDER_PARSERS = { 909 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 910 TokenType.PARAMETER: lambda self: self._parse_parameter(), 911 TokenType.COLON: lambda self: ( 912 self.expression(exp.Placeholder, this=self._prev.text) 913 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 914 else None 915 ), 916 } 917 918 RANGE_PARSERS = { 919 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 920 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 921 TokenType.GLOB: binary_range_parser(exp.Glob), 922 TokenType.ILIKE: binary_range_parser(exp.ILike), 923 TokenType.IN: lambda self, this: self._parse_in(this), 924 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 925 TokenType.IS: lambda self, this: self._parse_is(this), 926 TokenType.LIKE: binary_range_parser(exp.Like), 927 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 928 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 929 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 930 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 931 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 932 } 933 934 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 935 "ALLOWED_VALUES": lambda self: self.expression( 936 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 937 ), 938 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 939 "AUTO": lambda self: self._parse_auto_property(), 940 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 941 "BACKUP": lambda self: self.expression( 942 exp.BackupProperty, this=self._parse_var(any_token=True) 943 ), 944 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 945 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 946 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 947 "CHECKSUM": lambda self: self._parse_checksum(), 948 "CLUSTER BY": lambda self: self._parse_cluster(), 949 "CLUSTERED": lambda self: self._parse_clustered_by(), 950 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 951 exp.CollateProperty, **kwargs 952 ), 953 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 954 "CONTAINS": lambda self: self._parse_contains_property(), 955 "COPY": lambda self: self._parse_copy_property(), 956 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 957 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 958 "DEFINER": lambda self: self._parse_definer(), 959 "DETERMINISTIC": lambda self: self.expression( 960 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 961 ), 962 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 963 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 964 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 965 "DISTKEY": lambda self: self._parse_distkey(), 966 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 967 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 968 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 969 "ENVIRONMENT": lambda self: self.expression( 970 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 971 ), 972 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 973 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 974 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 975 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 976 "FREESPACE": lambda self: self._parse_freespace(), 977 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 978 "HEAP": lambda self: self.expression(exp.HeapProperty), 979 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 980 "IMMUTABLE": lambda self: self.expression( 981 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 982 ), 983 "INHERITS": lambda self: self.expression( 984 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 985 ), 986 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 987 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 988 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 989 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 990 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 991 "LIKE": lambda self: self._parse_create_like(), 992 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 993 "LOCK": lambda self: self._parse_locking(), 994 "LOCKING": lambda self: self._parse_locking(), 995 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 996 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 997 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 998 "MODIFIES": lambda self: self._parse_modifies_property(), 999 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1000 "NO": lambda self: self._parse_no_property(), 1001 "ON": lambda self: self._parse_on_property(), 1002 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1003 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1004 "PARTITION": lambda self: self._parse_partitioned_of(), 1005 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1006 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1007 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1008 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1009 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1010 "READS": lambda self: self._parse_reads_property(), 1011 "REMOTE": lambda self: self._parse_remote_with_connection(), 1012 "RETURNS": lambda self: self._parse_returns(), 1013 "STRICT": lambda self: self.expression(exp.StrictProperty), 1014 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1015 "ROW": lambda self: self._parse_row(), 1016 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1017 "SAMPLE": lambda self: self.expression( 1018 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1019 ), 1020 "SECURE": lambda self: self.expression(exp.SecureProperty), 1021 "SECURITY": lambda self: self._parse_security(), 1022 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1023 "SETTINGS": lambda self: self._parse_settings_property(), 1024 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1025 "SORTKEY": lambda self: self._parse_sortkey(), 1026 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1027 "STABLE": lambda self: self.expression( 1028 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1029 ), 1030 "STORED": lambda self: self._parse_stored(), 1031 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1032 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1033 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1034 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1035 "TO": lambda self: self._parse_to_table(), 1036 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1037 "TRANSFORM": lambda self: self.expression( 1038 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1039 ), 1040 "TTL": lambda self: self._parse_ttl(), 1041 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1042 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1043 "VOLATILE": lambda self: self._parse_volatile_property(), 1044 "WITH": lambda self: self._parse_with_property(), 1045 } 1046 1047 CONSTRAINT_PARSERS = { 1048 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1049 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1050 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1051 "CHARACTER SET": lambda self: self.expression( 1052 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1053 ), 1054 "CHECK": lambda self: self.expression( 1055 exp.CheckColumnConstraint, 1056 this=self._parse_wrapped(self._parse_assignment), 1057 enforced=self._match_text_seq("ENFORCED"), 1058 ), 1059 "COLLATE": lambda self: self.expression( 1060 exp.CollateColumnConstraint, 1061 this=self._parse_identifier() or self._parse_column(), 1062 ), 1063 "COMMENT": lambda self: self.expression( 1064 exp.CommentColumnConstraint, this=self._parse_string() 1065 ), 1066 "COMPRESS": lambda self: self._parse_compress(), 1067 "CLUSTERED": lambda self: self.expression( 1068 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1069 ), 1070 "NONCLUSTERED": lambda self: self.expression( 1071 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1072 ), 1073 "DEFAULT": lambda self: self.expression( 1074 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1075 ), 1076 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1077 "EPHEMERAL": lambda self: self.expression( 1078 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1079 ), 1080 "EXCLUDE": lambda self: self.expression( 1081 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1082 ), 1083 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1084 "FORMAT": lambda self: self.expression( 1085 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1086 ), 1087 "GENERATED": lambda self: self._parse_generated_as_identity(), 1088 "IDENTITY": lambda self: self._parse_auto_increment(), 1089 "INLINE": lambda self: self._parse_inline(), 1090 "LIKE": lambda self: self._parse_create_like(), 1091 "NOT": lambda self: self._parse_not_constraint(), 1092 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1093 "ON": lambda self: ( 1094 self._match(TokenType.UPDATE) 1095 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1096 ) 1097 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1098 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1099 "PERIOD": lambda self: self._parse_period_for_system_time(), 1100 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1101 "REFERENCES": lambda self: self._parse_references(match=False), 1102 "TITLE": lambda self: self.expression( 1103 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1104 ), 1105 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1106 "UNIQUE": lambda self: self._parse_unique(), 1107 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1108 "WATERMARK": lambda self: self.expression( 1109 exp.WatermarkColumnConstraint, 1110 this=self._match(TokenType.FOR) and self._parse_column(), 1111 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1112 ), 1113 "WITH": lambda self: self.expression( 1114 exp.Properties, expressions=self._parse_wrapped_properties() 1115 ), 1116 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1117 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1118 } 1119 1120 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1121 klass = ( 1122 exp.PartitionedByBucket 1123 if self._prev.text.upper() == "BUCKET" 1124 else exp.PartitionByTruncate 1125 ) 1126 1127 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1128 this, expression = seq_get(args, 0), seq_get(args, 1) 1129 1130 if isinstance(this, exp.Literal): 1131 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1132 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1133 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1134 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1135 # 1136 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1137 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1138 this, expression = expression, this 1139 1140 return self.expression(klass, this=this, expression=expression) 1141 1142 ALTER_PARSERS = { 1143 "ADD": lambda self: self._parse_alter_table_add(), 1144 "AS": lambda self: self._parse_select(), 1145 "ALTER": lambda self: self._parse_alter_table_alter(), 1146 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1147 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1148 "DROP": lambda self: self._parse_alter_table_drop(), 1149 "RENAME": lambda self: self._parse_alter_table_rename(), 1150 "SET": lambda self: self._parse_alter_table_set(), 1151 "SWAP": lambda self: self.expression( 1152 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1153 ), 1154 } 1155 1156 ALTER_ALTER_PARSERS = { 1157 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1158 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1159 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1160 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1161 } 1162 1163 SCHEMA_UNNAMED_CONSTRAINTS = { 1164 "CHECK", 1165 "EXCLUDE", 1166 "FOREIGN KEY", 1167 "LIKE", 1168 "PERIOD", 1169 "PRIMARY KEY", 1170 "UNIQUE", 1171 "WATERMARK", 1172 "BUCKET", 1173 "TRUNCATE", 1174 } 1175 1176 NO_PAREN_FUNCTION_PARSERS = { 1177 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1178 "CASE": lambda self: self._parse_case(), 1179 "CONNECT_BY_ROOT": lambda self: self.expression( 1180 exp.ConnectByRoot, this=self._parse_column() 1181 ), 1182 "IF": lambda self: self._parse_if(), 1183 } 1184 1185 INVALID_FUNC_NAME_TOKENS = { 1186 TokenType.IDENTIFIER, 1187 TokenType.STRING, 1188 } 1189 1190 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1191 1192 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1193 1194 FUNCTION_PARSERS = { 1195 **{ 1196 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1197 }, 1198 **{ 1199 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1200 }, 1201 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1202 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1203 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1204 "DECODE": lambda self: self._parse_decode(), 1205 "EXTRACT": lambda self: self._parse_extract(), 1206 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1207 "GAP_FILL": lambda self: self._parse_gap_fill(), 1208 "JSON_OBJECT": lambda self: self._parse_json_object(), 1209 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1210 "JSON_TABLE": lambda self: self._parse_json_table(), 1211 "MATCH": lambda self: self._parse_match_against(), 1212 "NORMALIZE": lambda self: self._parse_normalize(), 1213 "OPENJSON": lambda self: self._parse_open_json(), 1214 "OVERLAY": lambda self: self._parse_overlay(), 1215 "POSITION": lambda self: self._parse_position(), 1216 "PREDICT": lambda self: self._parse_predict(), 1217 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1218 "STRING_AGG": lambda self: self._parse_string_agg(), 1219 "SUBSTRING": lambda self: self._parse_substring(), 1220 "TRIM": lambda self: self._parse_trim(), 1221 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1222 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1223 "XMLELEMENT": lambda self: self.expression( 1224 exp.XMLElement, 1225 this=self._match_text_seq("NAME") and self._parse_id_var(), 1226 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1227 ), 1228 "XMLTABLE": lambda self: self._parse_xml_table(), 1229 } 1230 1231 QUERY_MODIFIER_PARSERS = { 1232 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1233 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1234 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1235 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1236 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1237 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1238 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1239 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1240 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1241 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1242 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1243 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1244 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1245 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1246 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1247 TokenType.CLUSTER_BY: lambda self: ( 1248 "cluster", 1249 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1250 ), 1251 TokenType.DISTRIBUTE_BY: lambda self: ( 1252 "distribute", 1253 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1254 ), 1255 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1256 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1257 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1258 } 1259 1260 SET_PARSERS = { 1261 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1262 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1263 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1264 "TRANSACTION": lambda self: self._parse_set_transaction(), 1265 } 1266 1267 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1268 1269 TYPE_LITERAL_PARSERS = { 1270 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1271 } 1272 1273 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1274 1275 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1276 1277 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1278 1279 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1280 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1281 "ISOLATION": ( 1282 ("LEVEL", "REPEATABLE", "READ"), 1283 ("LEVEL", "READ", "COMMITTED"), 1284 ("LEVEL", "READ", "UNCOMITTED"), 1285 ("LEVEL", "SERIALIZABLE"), 1286 ), 1287 "READ": ("WRITE", "ONLY"), 1288 } 1289 1290 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1291 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1292 ) 1293 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1294 1295 CREATE_SEQUENCE: OPTIONS_TYPE = { 1296 "SCALE": ("EXTEND", "NOEXTEND"), 1297 "SHARD": ("EXTEND", "NOEXTEND"), 1298 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1299 **dict.fromkeys( 1300 ( 1301 "SESSION", 1302 "GLOBAL", 1303 "KEEP", 1304 "NOKEEP", 1305 "ORDER", 1306 "NOORDER", 1307 "NOCACHE", 1308 "CYCLE", 1309 "NOCYCLE", 1310 "NOMINVALUE", 1311 "NOMAXVALUE", 1312 "NOSCALE", 1313 "NOSHARD", 1314 ), 1315 tuple(), 1316 ), 1317 } 1318 1319 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1320 1321 USABLES: OPTIONS_TYPE = dict.fromkeys( 1322 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1323 ) 1324 1325 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1326 1327 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1328 "TYPE": ("EVOLUTION",), 1329 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1330 } 1331 1332 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1333 1334 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1335 1336 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1337 "NOT": ("ENFORCED",), 1338 "MATCH": ( 1339 "FULL", 1340 "PARTIAL", 1341 "SIMPLE", 1342 ), 1343 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1344 "USING": ( 1345 "BTREE", 1346 "HASH", 1347 ), 1348 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1349 } 1350 1351 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1352 "NO": ("OTHERS",), 1353 "CURRENT": ("ROW",), 1354 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1355 } 1356 1357 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1358 1359 CLONE_KEYWORDS = {"CLONE", "COPY"} 1360 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1361 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1362 1363 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1364 1365 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1366 1367 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1368 1369 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1370 1371 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1372 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1373 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1374 1375 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1376 1377 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1378 1379 ADD_CONSTRAINT_TOKENS = { 1380 TokenType.CONSTRAINT, 1381 TokenType.FOREIGN_KEY, 1382 TokenType.INDEX, 1383 TokenType.KEY, 1384 TokenType.PRIMARY_KEY, 1385 TokenType.UNIQUE, 1386 } 1387 1388 DISTINCT_TOKENS = {TokenType.DISTINCT} 1389 1390 NULL_TOKENS = {TokenType.NULL} 1391 1392 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1393 1394 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1395 1396 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1397 1398 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1399 1400 ODBC_DATETIME_LITERALS = { 1401 "d": exp.Date, 1402 "t": exp.Time, 1403 "ts": exp.Timestamp, 1404 } 1405 1406 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1407 1408 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1409 1410 # The style options for the DESCRIBE statement 1411 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1412 1413 # The style options for the ANALYZE statement 1414 ANALYZE_STYLES = { 1415 "BUFFER_USAGE_LIMIT", 1416 "FULL", 1417 "LOCAL", 1418 "NO_WRITE_TO_BINLOG", 1419 "SAMPLE", 1420 "SKIP_LOCKED", 1421 "VERBOSE", 1422 } 1423 1424 ANALYZE_EXPRESSION_PARSERS = { 1425 "ALL": lambda self: self._parse_analyze_columns(), 1426 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1427 "DELETE": lambda self: self._parse_analyze_delete(), 1428 "DROP": lambda self: self._parse_analyze_histogram(), 1429 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1430 "LIST": lambda self: self._parse_analyze_list(), 1431 "PREDICATE": lambda self: self._parse_analyze_columns(), 1432 "UPDATE": lambda self: self._parse_analyze_histogram(), 1433 "VALIDATE": lambda self: self._parse_analyze_validate(), 1434 } 1435 1436 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1437 1438 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1439 1440 OPERATION_MODIFIERS: t.Set[str] = set() 1441 1442 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1443 1444 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1445 1446 STRICT_CAST = True 1447 1448 PREFIXED_PIVOT_COLUMNS = False 1449 IDENTIFY_PIVOT_STRINGS = False 1450 1451 LOG_DEFAULTS_TO_LN = False 1452 1453 # Whether ADD is present for each column added by ALTER TABLE 1454 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1455 1456 # Whether the table sample clause expects CSV syntax 1457 TABLESAMPLE_CSV = False 1458 1459 # The default method used for table sampling 1460 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1461 1462 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1463 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1464 1465 # Whether the TRIM function expects the characters to trim as its first argument 1466 TRIM_PATTERN_FIRST = False 1467 1468 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1469 STRING_ALIASES = False 1470 1471 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1472 MODIFIERS_ATTACHED_TO_SET_OP = True 1473 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1474 1475 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1476 NO_PAREN_IF_COMMANDS = True 1477 1478 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1479 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1480 1481 # Whether the `:` operator is used to extract a value from a VARIANT column 1482 COLON_IS_VARIANT_EXTRACT = False 1483 1484 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1485 # If this is True and '(' is not found, the keyword will be treated as an identifier 1486 VALUES_FOLLOWED_BY_PAREN = True 1487 1488 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1489 SUPPORTS_IMPLICIT_UNNEST = False 1490 1491 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1492 INTERVAL_SPANS = True 1493 1494 # Whether a PARTITION clause can follow a table reference 1495 SUPPORTS_PARTITION_SELECTION = False 1496 1497 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1498 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1499 1500 # Whether the 'AS' keyword is optional in the CTE definition syntax 1501 OPTIONAL_ALIAS_TOKEN_CTE = True 1502 1503 __slots__ = ( 1504 "error_level", 1505 "error_message_context", 1506 "max_errors", 1507 "dialect", 1508 "sql", 1509 "errors", 1510 "_tokens", 1511 "_index", 1512 "_curr", 1513 "_next", 1514 "_prev", 1515 "_prev_comments", 1516 ) 1517 1518 # Autofilled 1519 SHOW_TRIE: t.Dict = {} 1520 SET_TRIE: t.Dict = {} 1521 1522 def __init__( 1523 self, 1524 error_level: t.Optional[ErrorLevel] = None, 1525 error_message_context: int = 100, 1526 max_errors: int = 3, 1527 dialect: DialectType = None, 1528 ): 1529 from sqlglot.dialects import Dialect 1530 1531 self.error_level = error_level or ErrorLevel.IMMEDIATE 1532 self.error_message_context = error_message_context 1533 self.max_errors = max_errors 1534 self.dialect = Dialect.get_or_raise(dialect) 1535 self.reset() 1536 1537 def reset(self): 1538 self.sql = "" 1539 self.errors = [] 1540 self._tokens = [] 1541 self._index = 0 1542 self._curr = None 1543 self._next = None 1544 self._prev = None 1545 self._prev_comments = None 1546 1547 def parse( 1548 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1549 ) -> t.List[t.Optional[exp.Expression]]: 1550 """ 1551 Parses a list of tokens and returns a list of syntax trees, one tree 1552 per parsed SQL statement. 1553 1554 Args: 1555 raw_tokens: The list of tokens. 1556 sql: The original SQL string, used to produce helpful debug messages. 1557 1558 Returns: 1559 The list of the produced syntax trees. 1560 """ 1561 return self._parse( 1562 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1563 ) 1564 1565 def parse_into( 1566 self, 1567 expression_types: exp.IntoType, 1568 raw_tokens: t.List[Token], 1569 sql: t.Optional[str] = None, 1570 ) -> t.List[t.Optional[exp.Expression]]: 1571 """ 1572 Parses a list of tokens into a given Expression type. If a collection of Expression 1573 types is given instead, this method will try to parse the token list into each one 1574 of them, stopping at the first for which the parsing succeeds. 1575 1576 Args: 1577 expression_types: The expression type(s) to try and parse the token list into. 1578 raw_tokens: The list of tokens. 1579 sql: The original SQL string, used to produce helpful debug messages. 1580 1581 Returns: 1582 The target Expression. 1583 """ 1584 errors = [] 1585 for expression_type in ensure_list(expression_types): 1586 parser = self.EXPRESSION_PARSERS.get(expression_type) 1587 if not parser: 1588 raise TypeError(f"No parser registered for {expression_type}") 1589 1590 try: 1591 return self._parse(parser, raw_tokens, sql) 1592 except ParseError as e: 1593 e.errors[0]["into_expression"] = expression_type 1594 errors.append(e) 1595 1596 raise ParseError( 1597 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1598 errors=merge_errors(errors), 1599 ) from errors[-1] 1600 1601 def _parse( 1602 self, 1603 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1604 raw_tokens: t.List[Token], 1605 sql: t.Optional[str] = None, 1606 ) -> t.List[t.Optional[exp.Expression]]: 1607 self.reset() 1608 self.sql = sql or "" 1609 1610 total = len(raw_tokens) 1611 chunks: t.List[t.List[Token]] = [[]] 1612 1613 for i, token in enumerate(raw_tokens): 1614 if token.token_type == TokenType.SEMICOLON: 1615 if token.comments: 1616 chunks.append([token]) 1617 1618 if i < total - 1: 1619 chunks.append([]) 1620 else: 1621 chunks[-1].append(token) 1622 1623 expressions = [] 1624 1625 for tokens in chunks: 1626 self._index = -1 1627 self._tokens = tokens 1628 self._advance() 1629 1630 expressions.append(parse_method(self)) 1631 1632 if self._index < len(self._tokens): 1633 self.raise_error("Invalid expression / Unexpected token") 1634 1635 self.check_errors() 1636 1637 return expressions 1638 1639 def check_errors(self) -> None: 1640 """Logs or raises any found errors, depending on the chosen error level setting.""" 1641 if self.error_level == ErrorLevel.WARN: 1642 for error in self.errors: 1643 logger.error(str(error)) 1644 elif self.error_level == ErrorLevel.RAISE and self.errors: 1645 raise ParseError( 1646 concat_messages(self.errors, self.max_errors), 1647 errors=merge_errors(self.errors), 1648 ) 1649 1650 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1651 """ 1652 Appends an error in the list of recorded errors or raises it, depending on the chosen 1653 error level setting. 1654 """ 1655 token = token or self._curr or self._prev or Token.string("") 1656 start = token.start 1657 end = token.end + 1 1658 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1659 highlight = self.sql[start:end] 1660 end_context = self.sql[end : end + self.error_message_context] 1661 1662 error = ParseError.new( 1663 f"{message}. Line {token.line}, Col: {token.col}.\n" 1664 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1665 description=message, 1666 line=token.line, 1667 col=token.col, 1668 start_context=start_context, 1669 highlight=highlight, 1670 end_context=end_context, 1671 ) 1672 1673 if self.error_level == ErrorLevel.IMMEDIATE: 1674 raise error 1675 1676 self.errors.append(error) 1677 1678 def expression( 1679 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1680 ) -> E: 1681 """ 1682 Creates a new, validated Expression. 1683 1684 Args: 1685 exp_class: The expression class to instantiate. 1686 comments: An optional list of comments to attach to the expression. 1687 kwargs: The arguments to set for the expression along with their respective values. 1688 1689 Returns: 1690 The target expression. 1691 """ 1692 instance = exp_class(**kwargs) 1693 instance.add_comments(comments) if comments else self._add_comments(instance) 1694 return self.validate_expression(instance) 1695 1696 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1697 if expression and self._prev_comments: 1698 expression.add_comments(self._prev_comments) 1699 self._prev_comments = None 1700 1701 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1702 """ 1703 Validates an Expression, making sure that all its mandatory arguments are set. 1704 1705 Args: 1706 expression: The expression to validate. 1707 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1708 1709 Returns: 1710 The validated expression. 1711 """ 1712 if self.error_level != ErrorLevel.IGNORE: 1713 for error_message in expression.error_messages(args): 1714 self.raise_error(error_message) 1715 1716 return expression 1717 1718 def _find_sql(self, start: Token, end: Token) -> str: 1719 return self.sql[start.start : end.end + 1] 1720 1721 def _is_connected(self) -> bool: 1722 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1723 1724 def _advance(self, times: int = 1) -> None: 1725 self._index += times 1726 self._curr = seq_get(self._tokens, self._index) 1727 self._next = seq_get(self._tokens, self._index + 1) 1728 1729 if self._index > 0: 1730 self._prev = self._tokens[self._index - 1] 1731 self._prev_comments = self._prev.comments 1732 else: 1733 self._prev = None 1734 self._prev_comments = None 1735 1736 def _retreat(self, index: int) -> None: 1737 if index != self._index: 1738 self._advance(index - self._index) 1739 1740 def _warn_unsupported(self) -> None: 1741 if len(self._tokens) <= 1: 1742 return 1743 1744 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1745 # interested in emitting a warning for the one being currently processed. 1746 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1747 1748 logger.warning( 1749 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1750 ) 1751 1752 def _parse_command(self) -> exp.Command: 1753 self._warn_unsupported() 1754 return self.expression( 1755 exp.Command, 1756 comments=self._prev_comments, 1757 this=self._prev.text.upper(), 1758 expression=self._parse_string(), 1759 ) 1760 1761 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1762 """ 1763 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1764 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1765 solve this by setting & resetting the parser state accordingly 1766 """ 1767 index = self._index 1768 error_level = self.error_level 1769 1770 self.error_level = ErrorLevel.IMMEDIATE 1771 try: 1772 this = parse_method() 1773 except ParseError: 1774 this = None 1775 finally: 1776 if not this or retreat: 1777 self._retreat(index) 1778 self.error_level = error_level 1779 1780 return this 1781 1782 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1783 start = self._prev 1784 exists = self._parse_exists() if allow_exists else None 1785 1786 self._match(TokenType.ON) 1787 1788 materialized = self._match_text_seq("MATERIALIZED") 1789 kind = self._match_set(self.CREATABLES) and self._prev 1790 if not kind: 1791 return self._parse_as_command(start) 1792 1793 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1794 this = self._parse_user_defined_function(kind=kind.token_type) 1795 elif kind.token_type == TokenType.TABLE: 1796 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1797 elif kind.token_type == TokenType.COLUMN: 1798 this = self._parse_column() 1799 else: 1800 this = self._parse_id_var() 1801 1802 self._match(TokenType.IS) 1803 1804 return self.expression( 1805 exp.Comment, 1806 this=this, 1807 kind=kind.text, 1808 expression=self._parse_string(), 1809 exists=exists, 1810 materialized=materialized, 1811 ) 1812 1813 def _parse_to_table( 1814 self, 1815 ) -> exp.ToTableProperty: 1816 table = self._parse_table_parts(schema=True) 1817 return self.expression(exp.ToTableProperty, this=table) 1818 1819 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1820 def _parse_ttl(self) -> exp.Expression: 1821 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1822 this = self._parse_bitwise() 1823 1824 if self._match_text_seq("DELETE"): 1825 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1826 if self._match_text_seq("RECOMPRESS"): 1827 return self.expression( 1828 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1829 ) 1830 if self._match_text_seq("TO", "DISK"): 1831 return self.expression( 1832 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1833 ) 1834 if self._match_text_seq("TO", "VOLUME"): 1835 return self.expression( 1836 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1837 ) 1838 1839 return this 1840 1841 expressions = self._parse_csv(_parse_ttl_action) 1842 where = self._parse_where() 1843 group = self._parse_group() 1844 1845 aggregates = None 1846 if group and self._match(TokenType.SET): 1847 aggregates = self._parse_csv(self._parse_set_item) 1848 1849 return self.expression( 1850 exp.MergeTreeTTL, 1851 expressions=expressions, 1852 where=where, 1853 group=group, 1854 aggregates=aggregates, 1855 ) 1856 1857 def _parse_statement(self) -> t.Optional[exp.Expression]: 1858 if self._curr is None: 1859 return None 1860 1861 if self._match_set(self.STATEMENT_PARSERS): 1862 comments = self._prev_comments 1863 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1864 stmt.add_comments(comments, prepend=True) 1865 return stmt 1866 1867 if self._match_set(self.dialect.tokenizer.COMMANDS): 1868 return self._parse_command() 1869 1870 expression = self._parse_expression() 1871 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1872 return self._parse_query_modifiers(expression) 1873 1874 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1875 start = self._prev 1876 temporary = self._match(TokenType.TEMPORARY) 1877 materialized = self._match_text_seq("MATERIALIZED") 1878 1879 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1880 if not kind: 1881 return self._parse_as_command(start) 1882 1883 concurrently = self._match_text_seq("CONCURRENTLY") 1884 if_exists = exists or self._parse_exists() 1885 1886 if kind == "COLUMN": 1887 this = self._parse_column() 1888 else: 1889 this = self._parse_table_parts( 1890 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1891 ) 1892 1893 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1894 1895 if self._match(TokenType.L_PAREN, advance=False): 1896 expressions = self._parse_wrapped_csv(self._parse_types) 1897 else: 1898 expressions = None 1899 1900 return self.expression( 1901 exp.Drop, 1902 exists=if_exists, 1903 this=this, 1904 expressions=expressions, 1905 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1906 temporary=temporary, 1907 materialized=materialized, 1908 cascade=self._match_text_seq("CASCADE"), 1909 constraints=self._match_text_seq("CONSTRAINTS"), 1910 purge=self._match_text_seq("PURGE"), 1911 cluster=cluster, 1912 concurrently=concurrently, 1913 ) 1914 1915 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1916 return ( 1917 self._match_text_seq("IF") 1918 and (not not_ or self._match(TokenType.NOT)) 1919 and self._match(TokenType.EXISTS) 1920 ) 1921 1922 def _parse_create(self) -> exp.Create | exp.Command: 1923 # Note: this can't be None because we've matched a statement parser 1924 start = self._prev 1925 1926 replace = ( 1927 start.token_type == TokenType.REPLACE 1928 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1929 or self._match_pair(TokenType.OR, TokenType.ALTER) 1930 ) 1931 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1932 1933 unique = self._match(TokenType.UNIQUE) 1934 1935 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1936 clustered = True 1937 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1938 "COLUMNSTORE" 1939 ): 1940 clustered = False 1941 else: 1942 clustered = None 1943 1944 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1945 self._advance() 1946 1947 properties = None 1948 create_token = self._match_set(self.CREATABLES) and self._prev 1949 1950 if not create_token: 1951 # exp.Properties.Location.POST_CREATE 1952 properties = self._parse_properties() 1953 create_token = self._match_set(self.CREATABLES) and self._prev 1954 1955 if not properties or not create_token: 1956 return self._parse_as_command(start) 1957 1958 concurrently = self._match_text_seq("CONCURRENTLY") 1959 exists = self._parse_exists(not_=True) 1960 this = None 1961 expression: t.Optional[exp.Expression] = None 1962 indexes = None 1963 no_schema_binding = None 1964 begin = None 1965 end = None 1966 clone = None 1967 1968 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1969 nonlocal properties 1970 if properties and temp_props: 1971 properties.expressions.extend(temp_props.expressions) 1972 elif temp_props: 1973 properties = temp_props 1974 1975 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1976 this = self._parse_user_defined_function(kind=create_token.token_type) 1977 1978 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1979 extend_props(self._parse_properties()) 1980 1981 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1982 extend_props(self._parse_properties()) 1983 1984 if not expression: 1985 if self._match(TokenType.COMMAND): 1986 expression = self._parse_as_command(self._prev) 1987 else: 1988 begin = self._match(TokenType.BEGIN) 1989 return_ = self._match_text_seq("RETURN") 1990 1991 if self._match(TokenType.STRING, advance=False): 1992 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1993 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1994 expression = self._parse_string() 1995 extend_props(self._parse_properties()) 1996 else: 1997 expression = self._parse_user_defined_function_expression() 1998 1999 end = self._match_text_seq("END") 2000 2001 if return_: 2002 expression = self.expression(exp.Return, this=expression) 2003 elif create_token.token_type == TokenType.INDEX: 2004 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2005 if not self._match(TokenType.ON): 2006 index = self._parse_id_var() 2007 anonymous = False 2008 else: 2009 index = None 2010 anonymous = True 2011 2012 this = self._parse_index(index=index, anonymous=anonymous) 2013 elif create_token.token_type in self.DB_CREATABLES: 2014 table_parts = self._parse_table_parts( 2015 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2016 ) 2017 2018 # exp.Properties.Location.POST_NAME 2019 self._match(TokenType.COMMA) 2020 extend_props(self._parse_properties(before=True)) 2021 2022 this = self._parse_schema(this=table_parts) 2023 2024 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2025 extend_props(self._parse_properties()) 2026 2027 has_alias = self._match(TokenType.ALIAS) 2028 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2029 # exp.Properties.Location.POST_ALIAS 2030 extend_props(self._parse_properties()) 2031 2032 if create_token.token_type == TokenType.SEQUENCE: 2033 expression = self._parse_types() 2034 extend_props(self._parse_properties()) 2035 else: 2036 expression = self._parse_ddl_select() 2037 2038 # Some dialects also support using a table as an alias instead of a SELECT. 2039 # Here we fallback to this as an alternative. 2040 if not expression and has_alias: 2041 expression = self._try_parse(self._parse_table_parts) 2042 2043 if create_token.token_type == TokenType.TABLE: 2044 # exp.Properties.Location.POST_EXPRESSION 2045 extend_props(self._parse_properties()) 2046 2047 indexes = [] 2048 while True: 2049 index = self._parse_index() 2050 2051 # exp.Properties.Location.POST_INDEX 2052 extend_props(self._parse_properties()) 2053 if not index: 2054 break 2055 else: 2056 self._match(TokenType.COMMA) 2057 indexes.append(index) 2058 elif create_token.token_type == TokenType.VIEW: 2059 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2060 no_schema_binding = True 2061 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2062 extend_props(self._parse_properties()) 2063 2064 shallow = self._match_text_seq("SHALLOW") 2065 2066 if self._match_texts(self.CLONE_KEYWORDS): 2067 copy = self._prev.text.lower() == "copy" 2068 clone = self.expression( 2069 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2070 ) 2071 2072 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2073 return self._parse_as_command(start) 2074 2075 create_kind_text = create_token.text.upper() 2076 return self.expression( 2077 exp.Create, 2078 this=this, 2079 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2080 replace=replace, 2081 refresh=refresh, 2082 unique=unique, 2083 expression=expression, 2084 exists=exists, 2085 properties=properties, 2086 indexes=indexes, 2087 no_schema_binding=no_schema_binding, 2088 begin=begin, 2089 end=end, 2090 clone=clone, 2091 concurrently=concurrently, 2092 clustered=clustered, 2093 ) 2094 2095 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2096 seq = exp.SequenceProperties() 2097 2098 options = [] 2099 index = self._index 2100 2101 while self._curr: 2102 self._match(TokenType.COMMA) 2103 if self._match_text_seq("INCREMENT"): 2104 self._match_text_seq("BY") 2105 self._match_text_seq("=") 2106 seq.set("increment", self._parse_term()) 2107 elif self._match_text_seq("MINVALUE"): 2108 seq.set("minvalue", self._parse_term()) 2109 elif self._match_text_seq("MAXVALUE"): 2110 seq.set("maxvalue", self._parse_term()) 2111 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2112 self._match_text_seq("=") 2113 seq.set("start", self._parse_term()) 2114 elif self._match_text_seq("CACHE"): 2115 # T-SQL allows empty CACHE which is initialized dynamically 2116 seq.set("cache", self._parse_number() or True) 2117 elif self._match_text_seq("OWNED", "BY"): 2118 # "OWNED BY NONE" is the default 2119 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2120 else: 2121 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2122 if opt: 2123 options.append(opt) 2124 else: 2125 break 2126 2127 seq.set("options", options if options else None) 2128 return None if self._index == index else seq 2129 2130 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2131 # only used for teradata currently 2132 self._match(TokenType.COMMA) 2133 2134 kwargs = { 2135 "no": self._match_text_seq("NO"), 2136 "dual": self._match_text_seq("DUAL"), 2137 "before": self._match_text_seq("BEFORE"), 2138 "default": self._match_text_seq("DEFAULT"), 2139 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2140 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2141 "after": self._match_text_seq("AFTER"), 2142 "minimum": self._match_texts(("MIN", "MINIMUM")), 2143 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2144 } 2145 2146 if self._match_texts(self.PROPERTY_PARSERS): 2147 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2148 try: 2149 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2150 except TypeError: 2151 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2152 2153 return None 2154 2155 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2156 return self._parse_wrapped_csv(self._parse_property) 2157 2158 def _parse_property(self) -> t.Optional[exp.Expression]: 2159 if self._match_texts(self.PROPERTY_PARSERS): 2160 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2161 2162 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2163 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2164 2165 if self._match_text_seq("COMPOUND", "SORTKEY"): 2166 return self._parse_sortkey(compound=True) 2167 2168 if self._match_text_seq("SQL", "SECURITY"): 2169 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2170 2171 index = self._index 2172 key = self._parse_column() 2173 2174 if not self._match(TokenType.EQ): 2175 self._retreat(index) 2176 return self._parse_sequence_properties() 2177 2178 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2179 if isinstance(key, exp.Column): 2180 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2181 2182 value = self._parse_bitwise() or self._parse_var(any_token=True) 2183 2184 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2185 if isinstance(value, exp.Column): 2186 value = exp.var(value.name) 2187 2188 return self.expression(exp.Property, this=key, value=value) 2189 2190 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2191 if self._match_text_seq("BY"): 2192 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2193 2194 self._match(TokenType.ALIAS) 2195 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2196 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2197 2198 return self.expression( 2199 exp.FileFormatProperty, 2200 this=( 2201 self.expression( 2202 exp.InputOutputFormat, 2203 input_format=input_format, 2204 output_format=output_format, 2205 ) 2206 if input_format or output_format 2207 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2208 ), 2209 ) 2210 2211 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2212 field = self._parse_field() 2213 if isinstance(field, exp.Identifier) and not field.quoted: 2214 field = exp.var(field) 2215 2216 return field 2217 2218 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2219 self._match(TokenType.EQ) 2220 self._match(TokenType.ALIAS) 2221 2222 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2223 2224 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2225 properties = [] 2226 while True: 2227 if before: 2228 prop = self._parse_property_before() 2229 else: 2230 prop = self._parse_property() 2231 if not prop: 2232 break 2233 for p in ensure_list(prop): 2234 properties.append(p) 2235 2236 if properties: 2237 return self.expression(exp.Properties, expressions=properties) 2238 2239 return None 2240 2241 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2242 return self.expression( 2243 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2244 ) 2245 2246 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2247 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2248 security_specifier = self._prev.text.upper() 2249 return self.expression(exp.SecurityProperty, this=security_specifier) 2250 return None 2251 2252 def _parse_settings_property(self) -> exp.SettingsProperty: 2253 return self.expression( 2254 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2255 ) 2256 2257 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2258 if self._index >= 2: 2259 pre_volatile_token = self._tokens[self._index - 2] 2260 else: 2261 pre_volatile_token = None 2262 2263 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2264 return exp.VolatileProperty() 2265 2266 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2267 2268 def _parse_retention_period(self) -> exp.Var: 2269 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2270 number = self._parse_number() 2271 number_str = f"{number} " if number else "" 2272 unit = self._parse_var(any_token=True) 2273 return exp.var(f"{number_str}{unit}") 2274 2275 def _parse_system_versioning_property( 2276 self, with_: bool = False 2277 ) -> exp.WithSystemVersioningProperty: 2278 self._match(TokenType.EQ) 2279 prop = self.expression( 2280 exp.WithSystemVersioningProperty, 2281 **{ # type: ignore 2282 "on": True, 2283 "with": with_, 2284 }, 2285 ) 2286 2287 if self._match_text_seq("OFF"): 2288 prop.set("on", False) 2289 return prop 2290 2291 self._match(TokenType.ON) 2292 if self._match(TokenType.L_PAREN): 2293 while self._curr and not self._match(TokenType.R_PAREN): 2294 if self._match_text_seq("HISTORY_TABLE", "="): 2295 prop.set("this", self._parse_table_parts()) 2296 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2297 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2298 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2299 prop.set("retention_period", self._parse_retention_period()) 2300 2301 self._match(TokenType.COMMA) 2302 2303 return prop 2304 2305 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2306 self._match(TokenType.EQ) 2307 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2308 prop = self.expression(exp.DataDeletionProperty, on=on) 2309 2310 if self._match(TokenType.L_PAREN): 2311 while self._curr and not self._match(TokenType.R_PAREN): 2312 if self._match_text_seq("FILTER_COLUMN", "="): 2313 prop.set("filter_column", self._parse_column()) 2314 elif self._match_text_seq("RETENTION_PERIOD", "="): 2315 prop.set("retention_period", self._parse_retention_period()) 2316 2317 self._match(TokenType.COMMA) 2318 2319 return prop 2320 2321 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2322 kind = "HASH" 2323 expressions: t.Optional[t.List[exp.Expression]] = None 2324 if self._match_text_seq("BY", "HASH"): 2325 expressions = self._parse_wrapped_csv(self._parse_id_var) 2326 elif self._match_text_seq("BY", "RANDOM"): 2327 kind = "RANDOM" 2328 2329 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2330 buckets: t.Optional[exp.Expression] = None 2331 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2332 buckets = self._parse_number() 2333 2334 return self.expression( 2335 exp.DistributedByProperty, 2336 expressions=expressions, 2337 kind=kind, 2338 buckets=buckets, 2339 order=self._parse_order(), 2340 ) 2341 2342 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2343 self._match_text_seq("KEY") 2344 expressions = self._parse_wrapped_id_vars() 2345 return self.expression(expr_type, expressions=expressions) 2346 2347 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2348 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2349 prop = self._parse_system_versioning_property(with_=True) 2350 self._match_r_paren() 2351 return prop 2352 2353 if self._match(TokenType.L_PAREN, advance=False): 2354 return self._parse_wrapped_properties() 2355 2356 if self._match_text_seq("JOURNAL"): 2357 return self._parse_withjournaltable() 2358 2359 if self._match_texts(self.VIEW_ATTRIBUTES): 2360 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2361 2362 if self._match_text_seq("DATA"): 2363 return self._parse_withdata(no=False) 2364 elif self._match_text_seq("NO", "DATA"): 2365 return self._parse_withdata(no=True) 2366 2367 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2368 return self._parse_serde_properties(with_=True) 2369 2370 if self._match(TokenType.SCHEMA): 2371 return self.expression( 2372 exp.WithSchemaBindingProperty, 2373 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2374 ) 2375 2376 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2377 return self.expression( 2378 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2379 ) 2380 2381 if not self._next: 2382 return None 2383 2384 return self._parse_withisolatedloading() 2385 2386 def _parse_procedure_option(self) -> exp.Expression | None: 2387 if self._match_text_seq("EXECUTE", "AS"): 2388 return self.expression( 2389 exp.ExecuteAsProperty, 2390 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2391 or self._parse_string(), 2392 ) 2393 2394 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2395 2396 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2397 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2398 self._match(TokenType.EQ) 2399 2400 user = self._parse_id_var() 2401 self._match(TokenType.PARAMETER) 2402 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2403 2404 if not user or not host: 2405 return None 2406 2407 return exp.DefinerProperty(this=f"{user}@{host}") 2408 2409 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2410 self._match(TokenType.TABLE) 2411 self._match(TokenType.EQ) 2412 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2413 2414 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2415 return self.expression(exp.LogProperty, no=no) 2416 2417 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2418 return self.expression(exp.JournalProperty, **kwargs) 2419 2420 def _parse_checksum(self) -> exp.ChecksumProperty: 2421 self._match(TokenType.EQ) 2422 2423 on = None 2424 if self._match(TokenType.ON): 2425 on = True 2426 elif self._match_text_seq("OFF"): 2427 on = False 2428 2429 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2430 2431 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2432 return self.expression( 2433 exp.Cluster, 2434 expressions=( 2435 self._parse_wrapped_csv(self._parse_ordered) 2436 if wrapped 2437 else self._parse_csv(self._parse_ordered) 2438 ), 2439 ) 2440 2441 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2442 self._match_text_seq("BY") 2443 2444 self._match_l_paren() 2445 expressions = self._parse_csv(self._parse_column) 2446 self._match_r_paren() 2447 2448 if self._match_text_seq("SORTED", "BY"): 2449 self._match_l_paren() 2450 sorted_by = self._parse_csv(self._parse_ordered) 2451 self._match_r_paren() 2452 else: 2453 sorted_by = None 2454 2455 self._match(TokenType.INTO) 2456 buckets = self._parse_number() 2457 self._match_text_seq("BUCKETS") 2458 2459 return self.expression( 2460 exp.ClusteredByProperty, 2461 expressions=expressions, 2462 sorted_by=sorted_by, 2463 buckets=buckets, 2464 ) 2465 2466 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2467 if not self._match_text_seq("GRANTS"): 2468 self._retreat(self._index - 1) 2469 return None 2470 2471 return self.expression(exp.CopyGrantsProperty) 2472 2473 def _parse_freespace(self) -> exp.FreespaceProperty: 2474 self._match(TokenType.EQ) 2475 return self.expression( 2476 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2477 ) 2478 2479 def _parse_mergeblockratio( 2480 self, no: bool = False, default: bool = False 2481 ) -> exp.MergeBlockRatioProperty: 2482 if self._match(TokenType.EQ): 2483 return self.expression( 2484 exp.MergeBlockRatioProperty, 2485 this=self._parse_number(), 2486 percent=self._match(TokenType.PERCENT), 2487 ) 2488 2489 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2490 2491 def _parse_datablocksize( 2492 self, 2493 default: t.Optional[bool] = None, 2494 minimum: t.Optional[bool] = None, 2495 maximum: t.Optional[bool] = None, 2496 ) -> exp.DataBlocksizeProperty: 2497 self._match(TokenType.EQ) 2498 size = self._parse_number() 2499 2500 units = None 2501 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2502 units = self._prev.text 2503 2504 return self.expression( 2505 exp.DataBlocksizeProperty, 2506 size=size, 2507 units=units, 2508 default=default, 2509 minimum=minimum, 2510 maximum=maximum, 2511 ) 2512 2513 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2514 self._match(TokenType.EQ) 2515 always = self._match_text_seq("ALWAYS") 2516 manual = self._match_text_seq("MANUAL") 2517 never = self._match_text_seq("NEVER") 2518 default = self._match_text_seq("DEFAULT") 2519 2520 autotemp = None 2521 if self._match_text_seq("AUTOTEMP"): 2522 autotemp = self._parse_schema() 2523 2524 return self.expression( 2525 exp.BlockCompressionProperty, 2526 always=always, 2527 manual=manual, 2528 never=never, 2529 default=default, 2530 autotemp=autotemp, 2531 ) 2532 2533 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2534 index = self._index 2535 no = self._match_text_seq("NO") 2536 concurrent = self._match_text_seq("CONCURRENT") 2537 2538 if not self._match_text_seq("ISOLATED", "LOADING"): 2539 self._retreat(index) 2540 return None 2541 2542 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2543 return self.expression( 2544 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2545 ) 2546 2547 def _parse_locking(self) -> exp.LockingProperty: 2548 if self._match(TokenType.TABLE): 2549 kind = "TABLE" 2550 elif self._match(TokenType.VIEW): 2551 kind = "VIEW" 2552 elif self._match(TokenType.ROW): 2553 kind = "ROW" 2554 elif self._match_text_seq("DATABASE"): 2555 kind = "DATABASE" 2556 else: 2557 kind = None 2558 2559 if kind in ("DATABASE", "TABLE", "VIEW"): 2560 this = self._parse_table_parts() 2561 else: 2562 this = None 2563 2564 if self._match(TokenType.FOR): 2565 for_or_in = "FOR" 2566 elif self._match(TokenType.IN): 2567 for_or_in = "IN" 2568 else: 2569 for_or_in = None 2570 2571 if self._match_text_seq("ACCESS"): 2572 lock_type = "ACCESS" 2573 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2574 lock_type = "EXCLUSIVE" 2575 elif self._match_text_seq("SHARE"): 2576 lock_type = "SHARE" 2577 elif self._match_text_seq("READ"): 2578 lock_type = "READ" 2579 elif self._match_text_seq("WRITE"): 2580 lock_type = "WRITE" 2581 elif self._match_text_seq("CHECKSUM"): 2582 lock_type = "CHECKSUM" 2583 else: 2584 lock_type = None 2585 2586 override = self._match_text_seq("OVERRIDE") 2587 2588 return self.expression( 2589 exp.LockingProperty, 2590 this=this, 2591 kind=kind, 2592 for_or_in=for_or_in, 2593 lock_type=lock_type, 2594 override=override, 2595 ) 2596 2597 def _parse_partition_by(self) -> t.List[exp.Expression]: 2598 if self._match(TokenType.PARTITION_BY): 2599 return self._parse_csv(self._parse_assignment) 2600 return [] 2601 2602 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2603 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2604 if self._match_text_seq("MINVALUE"): 2605 return exp.var("MINVALUE") 2606 if self._match_text_seq("MAXVALUE"): 2607 return exp.var("MAXVALUE") 2608 return self._parse_bitwise() 2609 2610 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2611 expression = None 2612 from_expressions = None 2613 to_expressions = None 2614 2615 if self._match(TokenType.IN): 2616 this = self._parse_wrapped_csv(self._parse_bitwise) 2617 elif self._match(TokenType.FROM): 2618 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2619 self._match_text_seq("TO") 2620 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2621 elif self._match_text_seq("WITH", "(", "MODULUS"): 2622 this = self._parse_number() 2623 self._match_text_seq(",", "REMAINDER") 2624 expression = self._parse_number() 2625 self._match_r_paren() 2626 else: 2627 self.raise_error("Failed to parse partition bound spec.") 2628 2629 return self.expression( 2630 exp.PartitionBoundSpec, 2631 this=this, 2632 expression=expression, 2633 from_expressions=from_expressions, 2634 to_expressions=to_expressions, 2635 ) 2636 2637 # https://www.postgresql.org/docs/current/sql-createtable.html 2638 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2639 if not self._match_text_seq("OF"): 2640 self._retreat(self._index - 1) 2641 return None 2642 2643 this = self._parse_table(schema=True) 2644 2645 if self._match(TokenType.DEFAULT): 2646 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2647 elif self._match_text_seq("FOR", "VALUES"): 2648 expression = self._parse_partition_bound_spec() 2649 else: 2650 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2651 2652 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2653 2654 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2655 self._match(TokenType.EQ) 2656 return self.expression( 2657 exp.PartitionedByProperty, 2658 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2659 ) 2660 2661 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2662 if self._match_text_seq("AND", "STATISTICS"): 2663 statistics = True 2664 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2665 statistics = False 2666 else: 2667 statistics = None 2668 2669 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2670 2671 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2672 if self._match_text_seq("SQL"): 2673 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2674 return None 2675 2676 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2677 if self._match_text_seq("SQL", "DATA"): 2678 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2679 return None 2680 2681 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2682 if self._match_text_seq("PRIMARY", "INDEX"): 2683 return exp.NoPrimaryIndexProperty() 2684 if self._match_text_seq("SQL"): 2685 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2686 return None 2687 2688 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2689 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2690 return exp.OnCommitProperty() 2691 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2692 return exp.OnCommitProperty(delete=True) 2693 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2694 2695 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2696 if self._match_text_seq("SQL", "DATA"): 2697 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2698 return None 2699 2700 def _parse_distkey(self) -> exp.DistKeyProperty: 2701 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2702 2703 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2704 table = self._parse_table(schema=True) 2705 2706 options = [] 2707 while self._match_texts(("INCLUDING", "EXCLUDING")): 2708 this = self._prev.text.upper() 2709 2710 id_var = self._parse_id_var() 2711 if not id_var: 2712 return None 2713 2714 options.append( 2715 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2716 ) 2717 2718 return self.expression(exp.LikeProperty, this=table, expressions=options) 2719 2720 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2721 return self.expression( 2722 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2723 ) 2724 2725 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2726 self._match(TokenType.EQ) 2727 return self.expression( 2728 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2729 ) 2730 2731 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2732 self._match_text_seq("WITH", "CONNECTION") 2733 return self.expression( 2734 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2735 ) 2736 2737 def _parse_returns(self) -> exp.ReturnsProperty: 2738 value: t.Optional[exp.Expression] 2739 null = None 2740 is_table = self._match(TokenType.TABLE) 2741 2742 if is_table: 2743 if self._match(TokenType.LT): 2744 value = self.expression( 2745 exp.Schema, 2746 this="TABLE", 2747 expressions=self._parse_csv(self._parse_struct_types), 2748 ) 2749 if not self._match(TokenType.GT): 2750 self.raise_error("Expecting >") 2751 else: 2752 value = self._parse_schema(exp.var("TABLE")) 2753 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2754 null = True 2755 value = None 2756 else: 2757 value = self._parse_types() 2758 2759 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2760 2761 def _parse_describe(self) -> exp.Describe: 2762 kind = self._match_set(self.CREATABLES) and self._prev.text 2763 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2764 if self._match(TokenType.DOT): 2765 style = None 2766 self._retreat(self._index - 2) 2767 2768 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2769 2770 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2771 this = self._parse_statement() 2772 else: 2773 this = self._parse_table(schema=True) 2774 2775 properties = self._parse_properties() 2776 expressions = properties.expressions if properties else None 2777 partition = self._parse_partition() 2778 return self.expression( 2779 exp.Describe, 2780 this=this, 2781 style=style, 2782 kind=kind, 2783 expressions=expressions, 2784 partition=partition, 2785 format=format, 2786 ) 2787 2788 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2789 kind = self._prev.text.upper() 2790 expressions = [] 2791 2792 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2793 if self._match(TokenType.WHEN): 2794 expression = self._parse_disjunction() 2795 self._match(TokenType.THEN) 2796 else: 2797 expression = None 2798 2799 else_ = self._match(TokenType.ELSE) 2800 2801 if not self._match(TokenType.INTO): 2802 return None 2803 2804 return self.expression( 2805 exp.ConditionalInsert, 2806 this=self.expression( 2807 exp.Insert, 2808 this=self._parse_table(schema=True), 2809 expression=self._parse_derived_table_values(), 2810 ), 2811 expression=expression, 2812 else_=else_, 2813 ) 2814 2815 expression = parse_conditional_insert() 2816 while expression is not None: 2817 expressions.append(expression) 2818 expression = parse_conditional_insert() 2819 2820 return self.expression( 2821 exp.MultitableInserts, 2822 kind=kind, 2823 comments=comments, 2824 expressions=expressions, 2825 source=self._parse_table(), 2826 ) 2827 2828 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2829 comments = [] 2830 hint = self._parse_hint() 2831 overwrite = self._match(TokenType.OVERWRITE) 2832 ignore = self._match(TokenType.IGNORE) 2833 local = self._match_text_seq("LOCAL") 2834 alternative = None 2835 is_function = None 2836 2837 if self._match_text_seq("DIRECTORY"): 2838 this: t.Optional[exp.Expression] = self.expression( 2839 exp.Directory, 2840 this=self._parse_var_or_string(), 2841 local=local, 2842 row_format=self._parse_row_format(match_row=True), 2843 ) 2844 else: 2845 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2846 comments += ensure_list(self._prev_comments) 2847 return self._parse_multitable_inserts(comments) 2848 2849 if self._match(TokenType.OR): 2850 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2851 2852 self._match(TokenType.INTO) 2853 comments += ensure_list(self._prev_comments) 2854 self._match(TokenType.TABLE) 2855 is_function = self._match(TokenType.FUNCTION) 2856 2857 this = ( 2858 self._parse_table(schema=True, parse_partition=True) 2859 if not is_function 2860 else self._parse_function() 2861 ) 2862 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2863 this.set("alias", self._parse_table_alias()) 2864 2865 returning = self._parse_returning() 2866 2867 return self.expression( 2868 exp.Insert, 2869 comments=comments, 2870 hint=hint, 2871 is_function=is_function, 2872 this=this, 2873 stored=self._match_text_seq("STORED") and self._parse_stored(), 2874 by_name=self._match_text_seq("BY", "NAME"), 2875 exists=self._parse_exists(), 2876 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2877 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2878 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2879 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2880 conflict=self._parse_on_conflict(), 2881 returning=returning or self._parse_returning(), 2882 overwrite=overwrite, 2883 alternative=alternative, 2884 ignore=ignore, 2885 source=self._match(TokenType.TABLE) and self._parse_table(), 2886 ) 2887 2888 def _parse_kill(self) -> exp.Kill: 2889 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2890 2891 return self.expression( 2892 exp.Kill, 2893 this=self._parse_primary(), 2894 kind=kind, 2895 ) 2896 2897 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2898 conflict = self._match_text_seq("ON", "CONFLICT") 2899 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2900 2901 if not conflict and not duplicate: 2902 return None 2903 2904 conflict_keys = None 2905 constraint = None 2906 2907 if conflict: 2908 if self._match_text_seq("ON", "CONSTRAINT"): 2909 constraint = self._parse_id_var() 2910 elif self._match(TokenType.L_PAREN): 2911 conflict_keys = self._parse_csv(self._parse_id_var) 2912 self._match_r_paren() 2913 2914 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2915 if self._prev.token_type == TokenType.UPDATE: 2916 self._match(TokenType.SET) 2917 expressions = self._parse_csv(self._parse_equality) 2918 else: 2919 expressions = None 2920 2921 return self.expression( 2922 exp.OnConflict, 2923 duplicate=duplicate, 2924 expressions=expressions, 2925 action=action, 2926 conflict_keys=conflict_keys, 2927 constraint=constraint, 2928 where=self._parse_where(), 2929 ) 2930 2931 def _parse_returning(self) -> t.Optional[exp.Returning]: 2932 if not self._match(TokenType.RETURNING): 2933 return None 2934 return self.expression( 2935 exp.Returning, 2936 expressions=self._parse_csv(self._parse_expression), 2937 into=self._match(TokenType.INTO) and self._parse_table_part(), 2938 ) 2939 2940 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2941 if not self._match(TokenType.FORMAT): 2942 return None 2943 return self._parse_row_format() 2944 2945 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2946 index = self._index 2947 with_ = with_ or self._match_text_seq("WITH") 2948 2949 if not self._match(TokenType.SERDE_PROPERTIES): 2950 self._retreat(index) 2951 return None 2952 return self.expression( 2953 exp.SerdeProperties, 2954 **{ # type: ignore 2955 "expressions": self._parse_wrapped_properties(), 2956 "with": with_, 2957 }, 2958 ) 2959 2960 def _parse_row_format( 2961 self, match_row: bool = False 2962 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2963 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2964 return None 2965 2966 if self._match_text_seq("SERDE"): 2967 this = self._parse_string() 2968 2969 serde_properties = self._parse_serde_properties() 2970 2971 return self.expression( 2972 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2973 ) 2974 2975 self._match_text_seq("DELIMITED") 2976 2977 kwargs = {} 2978 2979 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2980 kwargs["fields"] = self._parse_string() 2981 if self._match_text_seq("ESCAPED", "BY"): 2982 kwargs["escaped"] = self._parse_string() 2983 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2984 kwargs["collection_items"] = self._parse_string() 2985 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2986 kwargs["map_keys"] = self._parse_string() 2987 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2988 kwargs["lines"] = self._parse_string() 2989 if self._match_text_seq("NULL", "DEFINED", "AS"): 2990 kwargs["null"] = self._parse_string() 2991 2992 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2993 2994 def _parse_load(self) -> exp.LoadData | exp.Command: 2995 if self._match_text_seq("DATA"): 2996 local = self._match_text_seq("LOCAL") 2997 self._match_text_seq("INPATH") 2998 inpath = self._parse_string() 2999 overwrite = self._match(TokenType.OVERWRITE) 3000 self._match_pair(TokenType.INTO, TokenType.TABLE) 3001 3002 return self.expression( 3003 exp.LoadData, 3004 this=self._parse_table(schema=True), 3005 local=local, 3006 overwrite=overwrite, 3007 inpath=inpath, 3008 partition=self._parse_partition(), 3009 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3010 serde=self._match_text_seq("SERDE") and self._parse_string(), 3011 ) 3012 return self._parse_as_command(self._prev) 3013 3014 def _parse_delete(self) -> exp.Delete: 3015 # This handles MySQL's "Multiple-Table Syntax" 3016 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3017 tables = None 3018 if not self._match(TokenType.FROM, advance=False): 3019 tables = self._parse_csv(self._parse_table) or None 3020 3021 returning = self._parse_returning() 3022 3023 return self.expression( 3024 exp.Delete, 3025 tables=tables, 3026 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3027 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3028 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3029 where=self._parse_where(), 3030 returning=returning or self._parse_returning(), 3031 limit=self._parse_limit(), 3032 ) 3033 3034 def _parse_update(self) -> exp.Update: 3035 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3036 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3037 returning = self._parse_returning() 3038 return self.expression( 3039 exp.Update, 3040 **{ # type: ignore 3041 "this": this, 3042 "expressions": expressions, 3043 "from": self._parse_from(joins=True), 3044 "where": self._parse_where(), 3045 "returning": returning or self._parse_returning(), 3046 "order": self._parse_order(), 3047 "limit": self._parse_limit(), 3048 }, 3049 ) 3050 3051 def _parse_use(self) -> exp.Use: 3052 return self.expression( 3053 exp.Use, 3054 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3055 this=self._parse_table(schema=False), 3056 ) 3057 3058 def _parse_uncache(self) -> exp.Uncache: 3059 if not self._match(TokenType.TABLE): 3060 self.raise_error("Expecting TABLE after UNCACHE") 3061 3062 return self.expression( 3063 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3064 ) 3065 3066 def _parse_cache(self) -> exp.Cache: 3067 lazy = self._match_text_seq("LAZY") 3068 self._match(TokenType.TABLE) 3069 table = self._parse_table(schema=True) 3070 3071 options = [] 3072 if self._match_text_seq("OPTIONS"): 3073 self._match_l_paren() 3074 k = self._parse_string() 3075 self._match(TokenType.EQ) 3076 v = self._parse_string() 3077 options = [k, v] 3078 self._match_r_paren() 3079 3080 self._match(TokenType.ALIAS) 3081 return self.expression( 3082 exp.Cache, 3083 this=table, 3084 lazy=lazy, 3085 options=options, 3086 expression=self._parse_select(nested=True), 3087 ) 3088 3089 def _parse_partition(self) -> t.Optional[exp.Partition]: 3090 if not self._match_texts(self.PARTITION_KEYWORDS): 3091 return None 3092 3093 return self.expression( 3094 exp.Partition, 3095 subpartition=self._prev.text.upper() == "SUBPARTITION", 3096 expressions=self._parse_wrapped_csv(self._parse_assignment), 3097 ) 3098 3099 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3100 def _parse_value_expression() -> t.Optional[exp.Expression]: 3101 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3102 return exp.var(self._prev.text.upper()) 3103 return self._parse_expression() 3104 3105 if self._match(TokenType.L_PAREN): 3106 expressions = self._parse_csv(_parse_value_expression) 3107 self._match_r_paren() 3108 return self.expression(exp.Tuple, expressions=expressions) 3109 3110 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3111 expression = self._parse_expression() 3112 if expression: 3113 return self.expression(exp.Tuple, expressions=[expression]) 3114 return None 3115 3116 def _parse_projections(self) -> t.List[exp.Expression]: 3117 return self._parse_expressions() 3118 3119 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3120 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3121 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3122 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3123 ) 3124 elif self._match(TokenType.FROM): 3125 from_ = self._parse_from(skip_from_token=True) 3126 # Support parentheses for duckdb FROM-first syntax 3127 select = self._parse_select() 3128 if select: 3129 select.set("from", from_) 3130 this = select 3131 else: 3132 this = exp.select("*").from_(t.cast(exp.From, from_)) 3133 else: 3134 this = ( 3135 self._parse_table() 3136 if table 3137 else self._parse_select(nested=True, parse_set_operation=False) 3138 ) 3139 3140 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3141 # in case a modifier (e.g. join) is following 3142 if table and isinstance(this, exp.Values) and this.alias: 3143 alias = this.args["alias"].pop() 3144 this = exp.Table(this=this, alias=alias) 3145 3146 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3147 3148 return this 3149 3150 def _parse_select( 3151 self, 3152 nested: bool = False, 3153 table: bool = False, 3154 parse_subquery_alias: bool = True, 3155 parse_set_operation: bool = True, 3156 ) -> t.Optional[exp.Expression]: 3157 cte = self._parse_with() 3158 3159 if cte: 3160 this = self._parse_statement() 3161 3162 if not this: 3163 self.raise_error("Failed to parse any statement following CTE") 3164 return cte 3165 3166 if "with" in this.arg_types: 3167 this.set("with", cte) 3168 else: 3169 self.raise_error(f"{this.key} does not support CTE") 3170 this = cte 3171 3172 return this 3173 3174 # duckdb supports leading with FROM x 3175 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3176 3177 if self._match(TokenType.SELECT): 3178 comments = self._prev_comments 3179 3180 hint = self._parse_hint() 3181 3182 if self._next and not self._next.token_type == TokenType.DOT: 3183 all_ = self._match(TokenType.ALL) 3184 distinct = self._match_set(self.DISTINCT_TOKENS) 3185 else: 3186 all_, distinct = None, None 3187 3188 kind = ( 3189 self._match(TokenType.ALIAS) 3190 and self._match_texts(("STRUCT", "VALUE")) 3191 and self._prev.text.upper() 3192 ) 3193 3194 if distinct: 3195 distinct = self.expression( 3196 exp.Distinct, 3197 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3198 ) 3199 3200 if all_ and distinct: 3201 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3202 3203 operation_modifiers = [] 3204 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3205 operation_modifiers.append(exp.var(self._prev.text.upper())) 3206 3207 limit = self._parse_limit(top=True) 3208 projections = self._parse_projections() 3209 3210 this = self.expression( 3211 exp.Select, 3212 kind=kind, 3213 hint=hint, 3214 distinct=distinct, 3215 expressions=projections, 3216 limit=limit, 3217 operation_modifiers=operation_modifiers or None, 3218 ) 3219 this.comments = comments 3220 3221 into = self._parse_into() 3222 if into: 3223 this.set("into", into) 3224 3225 if not from_: 3226 from_ = self._parse_from() 3227 3228 if from_: 3229 this.set("from", from_) 3230 3231 this = self._parse_query_modifiers(this) 3232 elif (table or nested) and self._match(TokenType.L_PAREN): 3233 this = self._parse_wrapped_select(table=table) 3234 3235 # We return early here so that the UNION isn't attached to the subquery by the 3236 # following call to _parse_set_operations, but instead becomes the parent node 3237 self._match_r_paren() 3238 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3239 elif self._match(TokenType.VALUES, advance=False): 3240 this = self._parse_derived_table_values() 3241 elif from_: 3242 this = exp.select("*").from_(from_.this, copy=False) 3243 elif self._match(TokenType.SUMMARIZE): 3244 table = self._match(TokenType.TABLE) 3245 this = self._parse_select() or self._parse_string() or self._parse_table() 3246 return self.expression(exp.Summarize, this=this, table=table) 3247 elif self._match(TokenType.DESCRIBE): 3248 this = self._parse_describe() 3249 elif self._match_text_seq("STREAM"): 3250 this = self._parse_function() 3251 if this: 3252 this = self.expression(exp.Stream, this=this) 3253 else: 3254 self._retreat(self._index - 1) 3255 else: 3256 this = None 3257 3258 return self._parse_set_operations(this) if parse_set_operation else this 3259 3260 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3261 self._match_text_seq("SEARCH") 3262 3263 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3264 3265 if not kind: 3266 return None 3267 3268 self._match_text_seq("FIRST", "BY") 3269 3270 return self.expression( 3271 exp.RecursiveWithSearch, 3272 kind=kind, 3273 this=self._parse_id_var(), 3274 expression=self._match_text_seq("SET") and self._parse_id_var(), 3275 using=self._match_text_seq("USING") and self._parse_id_var(), 3276 ) 3277 3278 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3279 if not skip_with_token and not self._match(TokenType.WITH): 3280 return None 3281 3282 comments = self._prev_comments 3283 recursive = self._match(TokenType.RECURSIVE) 3284 3285 last_comments = None 3286 expressions = [] 3287 while True: 3288 cte = self._parse_cte() 3289 if isinstance(cte, exp.CTE): 3290 expressions.append(cte) 3291 if last_comments: 3292 cte.add_comments(last_comments) 3293 3294 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3295 break 3296 else: 3297 self._match(TokenType.WITH) 3298 3299 last_comments = self._prev_comments 3300 3301 return self.expression( 3302 exp.With, 3303 comments=comments, 3304 expressions=expressions, 3305 recursive=recursive, 3306 search=self._parse_recursive_with_search(), 3307 ) 3308 3309 def _parse_cte(self) -> t.Optional[exp.CTE]: 3310 index = self._index 3311 3312 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3313 if not alias or not alias.this: 3314 self.raise_error("Expected CTE to have alias") 3315 3316 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3317 self._retreat(index) 3318 return None 3319 3320 comments = self._prev_comments 3321 3322 if self._match_text_seq("NOT", "MATERIALIZED"): 3323 materialized = False 3324 elif self._match_text_seq("MATERIALIZED"): 3325 materialized = True 3326 else: 3327 materialized = None 3328 3329 cte = self.expression( 3330 exp.CTE, 3331 this=self._parse_wrapped(self._parse_statement), 3332 alias=alias, 3333 materialized=materialized, 3334 comments=comments, 3335 ) 3336 3337 if isinstance(cte.this, exp.Values): 3338 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3339 3340 return cte 3341 3342 def _parse_table_alias( 3343 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3344 ) -> t.Optional[exp.TableAlias]: 3345 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3346 # so this section tries to parse the clause version and if it fails, it treats the token 3347 # as an identifier (alias) 3348 if self._can_parse_limit_or_offset(): 3349 return None 3350 3351 any_token = self._match(TokenType.ALIAS) 3352 alias = ( 3353 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3354 or self._parse_string_as_identifier() 3355 ) 3356 3357 index = self._index 3358 if self._match(TokenType.L_PAREN): 3359 columns = self._parse_csv(self._parse_function_parameter) 3360 self._match_r_paren() if columns else self._retreat(index) 3361 else: 3362 columns = None 3363 3364 if not alias and not columns: 3365 return None 3366 3367 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3368 3369 # We bubble up comments from the Identifier to the TableAlias 3370 if isinstance(alias, exp.Identifier): 3371 table_alias.add_comments(alias.pop_comments()) 3372 3373 return table_alias 3374 3375 def _parse_subquery( 3376 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3377 ) -> t.Optional[exp.Subquery]: 3378 if not this: 3379 return None 3380 3381 return self.expression( 3382 exp.Subquery, 3383 this=this, 3384 pivots=self._parse_pivots(), 3385 alias=self._parse_table_alias() if parse_alias else None, 3386 sample=self._parse_table_sample(), 3387 ) 3388 3389 def _implicit_unnests_to_explicit(self, this: E) -> E: 3390 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3391 3392 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3393 for i, join in enumerate(this.args.get("joins") or []): 3394 table = join.this 3395 normalized_table = table.copy() 3396 normalized_table.meta["maybe_column"] = True 3397 normalized_table = _norm(normalized_table, dialect=self.dialect) 3398 3399 if isinstance(table, exp.Table) and not join.args.get("on"): 3400 if normalized_table.parts[0].name in refs: 3401 table_as_column = table.to_column() 3402 unnest = exp.Unnest(expressions=[table_as_column]) 3403 3404 # Table.to_column creates a parent Alias node that we want to convert to 3405 # a TableAlias and attach to the Unnest, so it matches the parser's output 3406 if isinstance(table.args.get("alias"), exp.TableAlias): 3407 table_as_column.replace(table_as_column.this) 3408 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3409 3410 table.replace(unnest) 3411 3412 refs.add(normalized_table.alias_or_name) 3413 3414 return this 3415 3416 def _parse_query_modifiers( 3417 self, this: t.Optional[exp.Expression] 3418 ) -> t.Optional[exp.Expression]: 3419 if isinstance(this, self.MODIFIABLES): 3420 for join in self._parse_joins(): 3421 this.append("joins", join) 3422 for lateral in iter(self._parse_lateral, None): 3423 this.append("laterals", lateral) 3424 3425 while True: 3426 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3427 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3428 key, expression = parser(self) 3429 3430 if expression: 3431 this.set(key, expression) 3432 if key == "limit": 3433 offset = expression.args.pop("offset", None) 3434 3435 if offset: 3436 offset = exp.Offset(expression=offset) 3437 this.set("offset", offset) 3438 3439 limit_by_expressions = expression.expressions 3440 expression.set("expressions", None) 3441 offset.set("expressions", limit_by_expressions) 3442 continue 3443 break 3444 3445 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3446 this = self._implicit_unnests_to_explicit(this) 3447 3448 return this 3449 3450 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3451 start = self._curr 3452 while self._curr: 3453 self._advance() 3454 3455 end = self._tokens[self._index - 1] 3456 return exp.Hint(expressions=[self._find_sql(start, end)]) 3457 3458 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3459 return self._parse_function_call() 3460 3461 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3462 start_index = self._index 3463 should_fallback_to_string = False 3464 3465 hints = [] 3466 try: 3467 for hint in iter( 3468 lambda: self._parse_csv( 3469 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3470 ), 3471 [], 3472 ): 3473 hints.extend(hint) 3474 except ParseError: 3475 should_fallback_to_string = True 3476 3477 if should_fallback_to_string or self._curr: 3478 self._retreat(start_index) 3479 return self._parse_hint_fallback_to_string() 3480 3481 return self.expression(exp.Hint, expressions=hints) 3482 3483 def _parse_hint(self) -> t.Optional[exp.Hint]: 3484 if self._match(TokenType.HINT) and self._prev_comments: 3485 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3486 3487 return None 3488 3489 def _parse_into(self) -> t.Optional[exp.Into]: 3490 if not self._match(TokenType.INTO): 3491 return None 3492 3493 temp = self._match(TokenType.TEMPORARY) 3494 unlogged = self._match_text_seq("UNLOGGED") 3495 self._match(TokenType.TABLE) 3496 3497 return self.expression( 3498 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3499 ) 3500 3501 def _parse_from( 3502 self, joins: bool = False, skip_from_token: bool = False 3503 ) -> t.Optional[exp.From]: 3504 if not skip_from_token and not self._match(TokenType.FROM): 3505 return None 3506 3507 return self.expression( 3508 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3509 ) 3510 3511 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3512 return self.expression( 3513 exp.MatchRecognizeMeasure, 3514 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3515 this=self._parse_expression(), 3516 ) 3517 3518 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3519 if not self._match(TokenType.MATCH_RECOGNIZE): 3520 return None 3521 3522 self._match_l_paren() 3523 3524 partition = self._parse_partition_by() 3525 order = self._parse_order() 3526 3527 measures = ( 3528 self._parse_csv(self._parse_match_recognize_measure) 3529 if self._match_text_seq("MEASURES") 3530 else None 3531 ) 3532 3533 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3534 rows = exp.var("ONE ROW PER MATCH") 3535 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3536 text = "ALL ROWS PER MATCH" 3537 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3538 text += " SHOW EMPTY MATCHES" 3539 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3540 text += " OMIT EMPTY MATCHES" 3541 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3542 text += " WITH UNMATCHED ROWS" 3543 rows = exp.var(text) 3544 else: 3545 rows = None 3546 3547 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3548 text = "AFTER MATCH SKIP" 3549 if self._match_text_seq("PAST", "LAST", "ROW"): 3550 text += " PAST LAST ROW" 3551 elif self._match_text_seq("TO", "NEXT", "ROW"): 3552 text += " TO NEXT ROW" 3553 elif self._match_text_seq("TO", "FIRST"): 3554 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3555 elif self._match_text_seq("TO", "LAST"): 3556 text += f" TO LAST {self._advance_any().text}" # type: ignore 3557 after = exp.var(text) 3558 else: 3559 after = None 3560 3561 if self._match_text_seq("PATTERN"): 3562 self._match_l_paren() 3563 3564 if not self._curr: 3565 self.raise_error("Expecting )", self._curr) 3566 3567 paren = 1 3568 start = self._curr 3569 3570 while self._curr and paren > 0: 3571 if self._curr.token_type == TokenType.L_PAREN: 3572 paren += 1 3573 if self._curr.token_type == TokenType.R_PAREN: 3574 paren -= 1 3575 3576 end = self._prev 3577 self._advance() 3578 3579 if paren > 0: 3580 self.raise_error("Expecting )", self._curr) 3581 3582 pattern = exp.var(self._find_sql(start, end)) 3583 else: 3584 pattern = None 3585 3586 define = ( 3587 self._parse_csv(self._parse_name_as_expression) 3588 if self._match_text_seq("DEFINE") 3589 else None 3590 ) 3591 3592 self._match_r_paren() 3593 3594 return self.expression( 3595 exp.MatchRecognize, 3596 partition_by=partition, 3597 order=order, 3598 measures=measures, 3599 rows=rows, 3600 after=after, 3601 pattern=pattern, 3602 define=define, 3603 alias=self._parse_table_alias(), 3604 ) 3605 3606 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3607 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3608 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3609 cross_apply = False 3610 3611 if cross_apply is not None: 3612 this = self._parse_select(table=True) 3613 view = None 3614 outer = None 3615 elif self._match(TokenType.LATERAL): 3616 this = self._parse_select(table=True) 3617 view = self._match(TokenType.VIEW) 3618 outer = self._match(TokenType.OUTER) 3619 else: 3620 return None 3621 3622 if not this: 3623 this = ( 3624 self._parse_unnest() 3625 or self._parse_function() 3626 or self._parse_id_var(any_token=False) 3627 ) 3628 3629 while self._match(TokenType.DOT): 3630 this = exp.Dot( 3631 this=this, 3632 expression=self._parse_function() or self._parse_id_var(any_token=False), 3633 ) 3634 3635 ordinality: t.Optional[bool] = None 3636 3637 if view: 3638 table = self._parse_id_var(any_token=False) 3639 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3640 table_alias: t.Optional[exp.TableAlias] = self.expression( 3641 exp.TableAlias, this=table, columns=columns 3642 ) 3643 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3644 # We move the alias from the lateral's child node to the lateral itself 3645 table_alias = this.args["alias"].pop() 3646 else: 3647 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3648 table_alias = self._parse_table_alias() 3649 3650 return self.expression( 3651 exp.Lateral, 3652 this=this, 3653 view=view, 3654 outer=outer, 3655 alias=table_alias, 3656 cross_apply=cross_apply, 3657 ordinality=ordinality, 3658 ) 3659 3660 def _parse_join_parts( 3661 self, 3662 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3663 return ( 3664 self._match_set(self.JOIN_METHODS) and self._prev, 3665 self._match_set(self.JOIN_SIDES) and self._prev, 3666 self._match_set(self.JOIN_KINDS) and self._prev, 3667 ) 3668 3669 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3670 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3671 this = self._parse_column() 3672 if isinstance(this, exp.Column): 3673 return this.this 3674 return this 3675 3676 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3677 3678 def _parse_join( 3679 self, skip_join_token: bool = False, parse_bracket: bool = False 3680 ) -> t.Optional[exp.Join]: 3681 if self._match(TokenType.COMMA): 3682 table = self._try_parse(self._parse_table) 3683 if table: 3684 return self.expression(exp.Join, this=table) 3685 return None 3686 3687 index = self._index 3688 method, side, kind = self._parse_join_parts() 3689 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3690 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3691 3692 if not skip_join_token and not join: 3693 self._retreat(index) 3694 kind = None 3695 method = None 3696 side = None 3697 3698 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3699 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3700 3701 if not skip_join_token and not join and not outer_apply and not cross_apply: 3702 return None 3703 3704 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3705 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3706 kwargs["expressions"] = self._parse_csv( 3707 lambda: self._parse_table(parse_bracket=parse_bracket) 3708 ) 3709 3710 if method: 3711 kwargs["method"] = method.text 3712 if side: 3713 kwargs["side"] = side.text 3714 if kind: 3715 kwargs["kind"] = kind.text 3716 if hint: 3717 kwargs["hint"] = hint 3718 3719 if self._match(TokenType.MATCH_CONDITION): 3720 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3721 3722 if self._match(TokenType.ON): 3723 kwargs["on"] = self._parse_assignment() 3724 elif self._match(TokenType.USING): 3725 kwargs["using"] = self._parse_using_identifiers() 3726 elif ( 3727 not (outer_apply or cross_apply) 3728 and not isinstance(kwargs["this"], exp.Unnest) 3729 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3730 ): 3731 index = self._index 3732 joins: t.Optional[list] = list(self._parse_joins()) 3733 3734 if joins and self._match(TokenType.ON): 3735 kwargs["on"] = self._parse_assignment() 3736 elif joins and self._match(TokenType.USING): 3737 kwargs["using"] = self._parse_using_identifiers() 3738 else: 3739 joins = None 3740 self._retreat(index) 3741 3742 kwargs["this"].set("joins", joins if joins else None) 3743 3744 kwargs["pivots"] = self._parse_pivots() 3745 3746 comments = [c for token in (method, side, kind) if token for c in token.comments] 3747 return self.expression(exp.Join, comments=comments, **kwargs) 3748 3749 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3750 this = self._parse_assignment() 3751 3752 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3753 return this 3754 3755 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3756 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3757 3758 return this 3759 3760 def _parse_index_params(self) -> exp.IndexParameters: 3761 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3762 3763 if self._match(TokenType.L_PAREN, advance=False): 3764 columns = self._parse_wrapped_csv(self._parse_with_operator) 3765 else: 3766 columns = None 3767 3768 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3769 partition_by = self._parse_partition_by() 3770 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3771 tablespace = ( 3772 self._parse_var(any_token=True) 3773 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3774 else None 3775 ) 3776 where = self._parse_where() 3777 3778 on = self._parse_field() if self._match(TokenType.ON) else None 3779 3780 return self.expression( 3781 exp.IndexParameters, 3782 using=using, 3783 columns=columns, 3784 include=include, 3785 partition_by=partition_by, 3786 where=where, 3787 with_storage=with_storage, 3788 tablespace=tablespace, 3789 on=on, 3790 ) 3791 3792 def _parse_index( 3793 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3794 ) -> t.Optional[exp.Index]: 3795 if index or anonymous: 3796 unique = None 3797 primary = None 3798 amp = None 3799 3800 self._match(TokenType.ON) 3801 self._match(TokenType.TABLE) # hive 3802 table = self._parse_table_parts(schema=True) 3803 else: 3804 unique = self._match(TokenType.UNIQUE) 3805 primary = self._match_text_seq("PRIMARY") 3806 amp = self._match_text_seq("AMP") 3807 3808 if not self._match(TokenType.INDEX): 3809 return None 3810 3811 index = self._parse_id_var() 3812 table = None 3813 3814 params = self._parse_index_params() 3815 3816 return self.expression( 3817 exp.Index, 3818 this=index, 3819 table=table, 3820 unique=unique, 3821 primary=primary, 3822 amp=amp, 3823 params=params, 3824 ) 3825 3826 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3827 hints: t.List[exp.Expression] = [] 3828 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3829 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3830 hints.append( 3831 self.expression( 3832 exp.WithTableHint, 3833 expressions=self._parse_csv( 3834 lambda: self._parse_function() or self._parse_var(any_token=True) 3835 ), 3836 ) 3837 ) 3838 self._match_r_paren() 3839 else: 3840 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3841 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3842 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3843 3844 self._match_set((TokenType.INDEX, TokenType.KEY)) 3845 if self._match(TokenType.FOR): 3846 hint.set("target", self._advance_any() and self._prev.text.upper()) 3847 3848 hint.set("expressions", self._parse_wrapped_id_vars()) 3849 hints.append(hint) 3850 3851 return hints or None 3852 3853 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3854 return ( 3855 (not schema and self._parse_function(optional_parens=False)) 3856 or self._parse_id_var(any_token=False) 3857 or self._parse_string_as_identifier() 3858 or self._parse_placeholder() 3859 ) 3860 3861 def _parse_table_parts( 3862 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3863 ) -> exp.Table: 3864 catalog = None 3865 db = None 3866 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3867 3868 while self._match(TokenType.DOT): 3869 if catalog: 3870 # This allows nesting the table in arbitrarily many dot expressions if needed 3871 table = self.expression( 3872 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3873 ) 3874 else: 3875 catalog = db 3876 db = table 3877 # "" used for tsql FROM a..b case 3878 table = self._parse_table_part(schema=schema) or "" 3879 3880 if ( 3881 wildcard 3882 and self._is_connected() 3883 and (isinstance(table, exp.Identifier) or not table) 3884 and self._match(TokenType.STAR) 3885 ): 3886 if isinstance(table, exp.Identifier): 3887 table.args["this"] += "*" 3888 else: 3889 table = exp.Identifier(this="*") 3890 3891 # We bubble up comments from the Identifier to the Table 3892 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3893 3894 if is_db_reference: 3895 catalog = db 3896 db = table 3897 table = None 3898 3899 if not table and not is_db_reference: 3900 self.raise_error(f"Expected table name but got {self._curr}") 3901 if not db and is_db_reference: 3902 self.raise_error(f"Expected database name but got {self._curr}") 3903 3904 table = self.expression( 3905 exp.Table, 3906 comments=comments, 3907 this=table, 3908 db=db, 3909 catalog=catalog, 3910 ) 3911 3912 changes = self._parse_changes() 3913 if changes: 3914 table.set("changes", changes) 3915 3916 at_before = self._parse_historical_data() 3917 if at_before: 3918 table.set("when", at_before) 3919 3920 pivots = self._parse_pivots() 3921 if pivots: 3922 table.set("pivots", pivots) 3923 3924 return table 3925 3926 def _parse_table( 3927 self, 3928 schema: bool = False, 3929 joins: bool = False, 3930 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3931 parse_bracket: bool = False, 3932 is_db_reference: bool = False, 3933 parse_partition: bool = False, 3934 ) -> t.Optional[exp.Expression]: 3935 lateral = self._parse_lateral() 3936 if lateral: 3937 return lateral 3938 3939 unnest = self._parse_unnest() 3940 if unnest: 3941 return unnest 3942 3943 values = self._parse_derived_table_values() 3944 if values: 3945 return values 3946 3947 subquery = self._parse_select(table=True) 3948 if subquery: 3949 if not subquery.args.get("pivots"): 3950 subquery.set("pivots", self._parse_pivots()) 3951 return subquery 3952 3953 bracket = parse_bracket and self._parse_bracket(None) 3954 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3955 3956 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3957 self._parse_table 3958 ) 3959 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3960 3961 only = self._match(TokenType.ONLY) 3962 3963 this = t.cast( 3964 exp.Expression, 3965 bracket 3966 or rows_from 3967 or self._parse_bracket( 3968 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3969 ), 3970 ) 3971 3972 if only: 3973 this.set("only", only) 3974 3975 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3976 self._match_text_seq("*") 3977 3978 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3979 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3980 this.set("partition", self._parse_partition()) 3981 3982 if schema: 3983 return self._parse_schema(this=this) 3984 3985 version = self._parse_version() 3986 3987 if version: 3988 this.set("version", version) 3989 3990 if self.dialect.ALIAS_POST_TABLESAMPLE: 3991 this.set("sample", self._parse_table_sample()) 3992 3993 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3994 if alias: 3995 this.set("alias", alias) 3996 3997 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3998 return self.expression( 3999 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4000 ) 4001 4002 this.set("hints", self._parse_table_hints()) 4003 4004 if not this.args.get("pivots"): 4005 this.set("pivots", self._parse_pivots()) 4006 4007 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4008 this.set("sample", self._parse_table_sample()) 4009 4010 if joins: 4011 for join in self._parse_joins(): 4012 this.append("joins", join) 4013 4014 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4015 this.set("ordinality", True) 4016 this.set("alias", self._parse_table_alias()) 4017 4018 return this 4019 4020 def _parse_version(self) -> t.Optional[exp.Version]: 4021 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4022 this = "TIMESTAMP" 4023 elif self._match(TokenType.VERSION_SNAPSHOT): 4024 this = "VERSION" 4025 else: 4026 return None 4027 4028 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4029 kind = self._prev.text.upper() 4030 start = self._parse_bitwise() 4031 self._match_texts(("TO", "AND")) 4032 end = self._parse_bitwise() 4033 expression: t.Optional[exp.Expression] = self.expression( 4034 exp.Tuple, expressions=[start, end] 4035 ) 4036 elif self._match_text_seq("CONTAINED", "IN"): 4037 kind = "CONTAINED IN" 4038 expression = self.expression( 4039 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4040 ) 4041 elif self._match(TokenType.ALL): 4042 kind = "ALL" 4043 expression = None 4044 else: 4045 self._match_text_seq("AS", "OF") 4046 kind = "AS OF" 4047 expression = self._parse_type() 4048 4049 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4050 4051 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4052 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4053 index = self._index 4054 historical_data = None 4055 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4056 this = self._prev.text.upper() 4057 kind = ( 4058 self._match(TokenType.L_PAREN) 4059 and self._match_texts(self.HISTORICAL_DATA_KIND) 4060 and self._prev.text.upper() 4061 ) 4062 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4063 4064 if expression: 4065 self._match_r_paren() 4066 historical_data = self.expression( 4067 exp.HistoricalData, this=this, kind=kind, expression=expression 4068 ) 4069 else: 4070 self._retreat(index) 4071 4072 return historical_data 4073 4074 def _parse_changes(self) -> t.Optional[exp.Changes]: 4075 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4076 return None 4077 4078 information = self._parse_var(any_token=True) 4079 self._match_r_paren() 4080 4081 return self.expression( 4082 exp.Changes, 4083 information=information, 4084 at_before=self._parse_historical_data(), 4085 end=self._parse_historical_data(), 4086 ) 4087 4088 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4089 if not self._match(TokenType.UNNEST): 4090 return None 4091 4092 expressions = self._parse_wrapped_csv(self._parse_equality) 4093 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4094 4095 alias = self._parse_table_alias() if with_alias else None 4096 4097 if alias: 4098 if self.dialect.UNNEST_COLUMN_ONLY: 4099 if alias.args.get("columns"): 4100 self.raise_error("Unexpected extra column alias in unnest.") 4101 4102 alias.set("columns", [alias.this]) 4103 alias.set("this", None) 4104 4105 columns = alias.args.get("columns") or [] 4106 if offset and len(expressions) < len(columns): 4107 offset = columns.pop() 4108 4109 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4110 self._match(TokenType.ALIAS) 4111 offset = self._parse_id_var( 4112 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4113 ) or exp.to_identifier("offset") 4114 4115 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4116 4117 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4118 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4119 if not is_derived and not ( 4120 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4121 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4122 ): 4123 return None 4124 4125 expressions = self._parse_csv(self._parse_value) 4126 alias = self._parse_table_alias() 4127 4128 if is_derived: 4129 self._match_r_paren() 4130 4131 return self.expression( 4132 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4133 ) 4134 4135 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4136 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4137 as_modifier and self._match_text_seq("USING", "SAMPLE") 4138 ): 4139 return None 4140 4141 bucket_numerator = None 4142 bucket_denominator = None 4143 bucket_field = None 4144 percent = None 4145 size = None 4146 seed = None 4147 4148 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4149 matched_l_paren = self._match(TokenType.L_PAREN) 4150 4151 if self.TABLESAMPLE_CSV: 4152 num = None 4153 expressions = self._parse_csv(self._parse_primary) 4154 else: 4155 expressions = None 4156 num = ( 4157 self._parse_factor() 4158 if self._match(TokenType.NUMBER, advance=False) 4159 else self._parse_primary() or self._parse_placeholder() 4160 ) 4161 4162 if self._match_text_seq("BUCKET"): 4163 bucket_numerator = self._parse_number() 4164 self._match_text_seq("OUT", "OF") 4165 bucket_denominator = bucket_denominator = self._parse_number() 4166 self._match(TokenType.ON) 4167 bucket_field = self._parse_field() 4168 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4169 percent = num 4170 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4171 size = num 4172 else: 4173 percent = num 4174 4175 if matched_l_paren: 4176 self._match_r_paren() 4177 4178 if self._match(TokenType.L_PAREN): 4179 method = self._parse_var(upper=True) 4180 seed = self._match(TokenType.COMMA) and self._parse_number() 4181 self._match_r_paren() 4182 elif self._match_texts(("SEED", "REPEATABLE")): 4183 seed = self._parse_wrapped(self._parse_number) 4184 4185 if not method and self.DEFAULT_SAMPLING_METHOD: 4186 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4187 4188 return self.expression( 4189 exp.TableSample, 4190 expressions=expressions, 4191 method=method, 4192 bucket_numerator=bucket_numerator, 4193 bucket_denominator=bucket_denominator, 4194 bucket_field=bucket_field, 4195 percent=percent, 4196 size=size, 4197 seed=seed, 4198 ) 4199 4200 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4201 return list(iter(self._parse_pivot, None)) or None 4202 4203 def _parse_joins(self) -> t.Iterator[exp.Join]: 4204 return iter(self._parse_join, None) 4205 4206 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4207 if not self._match(TokenType.INTO): 4208 return None 4209 4210 return self.expression( 4211 exp.UnpivotColumns, 4212 this=self._match_text_seq("NAME") and self._parse_column(), 4213 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4214 ) 4215 4216 # https://duckdb.org/docs/sql/statements/pivot 4217 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4218 def _parse_on() -> t.Optional[exp.Expression]: 4219 this = self._parse_bitwise() 4220 4221 if self._match(TokenType.IN): 4222 # PIVOT ... ON col IN (row_val1, row_val2) 4223 return self._parse_in(this) 4224 if self._match(TokenType.ALIAS, advance=False): 4225 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4226 return self._parse_alias(this) 4227 4228 return this 4229 4230 this = self._parse_table() 4231 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4232 into = self._parse_unpivot_columns() 4233 using = self._match(TokenType.USING) and self._parse_csv( 4234 lambda: self._parse_alias(self._parse_function()) 4235 ) 4236 group = self._parse_group() 4237 4238 return self.expression( 4239 exp.Pivot, 4240 this=this, 4241 expressions=expressions, 4242 using=using, 4243 group=group, 4244 unpivot=is_unpivot, 4245 into=into, 4246 ) 4247 4248 def _parse_pivot_in(self) -> exp.In: 4249 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4250 this = self._parse_select_or_expression() 4251 4252 self._match(TokenType.ALIAS) 4253 alias = self._parse_bitwise() 4254 if alias: 4255 if isinstance(alias, exp.Column) and not alias.db: 4256 alias = alias.this 4257 return self.expression(exp.PivotAlias, this=this, alias=alias) 4258 4259 return this 4260 4261 value = self._parse_column() 4262 4263 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4264 self.raise_error("Expecting IN (") 4265 4266 if self._match(TokenType.ANY): 4267 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4268 else: 4269 exprs = self._parse_csv(_parse_aliased_expression) 4270 4271 self._match_r_paren() 4272 return self.expression(exp.In, this=value, expressions=exprs) 4273 4274 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4275 index = self._index 4276 include_nulls = None 4277 4278 if self._match(TokenType.PIVOT): 4279 unpivot = False 4280 elif self._match(TokenType.UNPIVOT): 4281 unpivot = True 4282 4283 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4284 if self._match_text_seq("INCLUDE", "NULLS"): 4285 include_nulls = True 4286 elif self._match_text_seq("EXCLUDE", "NULLS"): 4287 include_nulls = False 4288 else: 4289 return None 4290 4291 expressions = [] 4292 4293 if not self._match(TokenType.L_PAREN): 4294 self._retreat(index) 4295 return None 4296 4297 if unpivot: 4298 expressions = self._parse_csv(self._parse_column) 4299 else: 4300 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4301 4302 if not expressions: 4303 self.raise_error("Failed to parse PIVOT's aggregation list") 4304 4305 if not self._match(TokenType.FOR): 4306 self.raise_error("Expecting FOR") 4307 4308 fields = [] 4309 while True: 4310 field = self._try_parse(self._parse_pivot_in) 4311 if not field: 4312 break 4313 fields.append(field) 4314 4315 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4316 self._parse_bitwise 4317 ) 4318 4319 group = self._parse_group() 4320 4321 self._match_r_paren() 4322 4323 pivot = self.expression( 4324 exp.Pivot, 4325 expressions=expressions, 4326 fields=fields, 4327 unpivot=unpivot, 4328 include_nulls=include_nulls, 4329 default_on_null=default_on_null, 4330 group=group, 4331 ) 4332 4333 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4334 pivot.set("alias", self._parse_table_alias()) 4335 4336 if not unpivot: 4337 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4338 4339 columns: t.List[exp.Expression] = [] 4340 all_fields = [] 4341 for pivot_field in pivot.fields: 4342 pivot_field_expressions = pivot_field.expressions 4343 4344 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4345 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4346 continue 4347 4348 all_fields.append( 4349 [ 4350 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4351 for fld in pivot_field_expressions 4352 ] 4353 ) 4354 4355 if all_fields: 4356 if names: 4357 all_fields.append(names) 4358 4359 # Generate all possible combinations of the pivot columns 4360 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4361 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4362 for fld_parts_tuple in itertools.product(*all_fields): 4363 fld_parts = list(fld_parts_tuple) 4364 4365 if names and self.PREFIXED_PIVOT_COLUMNS: 4366 # Move the "name" to the front of the list 4367 fld_parts.insert(0, fld_parts.pop(-1)) 4368 4369 columns.append(exp.to_identifier("_".join(fld_parts))) 4370 4371 pivot.set("columns", columns) 4372 4373 return pivot 4374 4375 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4376 return [agg.alias for agg in aggregations if agg.alias] 4377 4378 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4379 if not skip_where_token and not self._match(TokenType.PREWHERE): 4380 return None 4381 4382 return self.expression( 4383 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4384 ) 4385 4386 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4387 if not skip_where_token and not self._match(TokenType.WHERE): 4388 return None 4389 4390 return self.expression( 4391 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4392 ) 4393 4394 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4395 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4396 return None 4397 4398 elements: t.Dict[str, t.Any] = defaultdict(list) 4399 4400 if self._match(TokenType.ALL): 4401 elements["all"] = True 4402 elif self._match(TokenType.DISTINCT): 4403 elements["all"] = False 4404 4405 while True: 4406 index = self._index 4407 4408 elements["expressions"].extend( 4409 self._parse_csv( 4410 lambda: None 4411 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4412 else self._parse_assignment() 4413 ) 4414 ) 4415 4416 before_with_index = self._index 4417 with_prefix = self._match(TokenType.WITH) 4418 4419 if self._match(TokenType.ROLLUP): 4420 elements["rollup"].append( 4421 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4422 ) 4423 elif self._match(TokenType.CUBE): 4424 elements["cube"].append( 4425 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4426 ) 4427 elif self._match(TokenType.GROUPING_SETS): 4428 elements["grouping_sets"].append( 4429 self.expression( 4430 exp.GroupingSets, 4431 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4432 ) 4433 ) 4434 elif self._match_text_seq("TOTALS"): 4435 elements["totals"] = True # type: ignore 4436 4437 if before_with_index <= self._index <= before_with_index + 1: 4438 self._retreat(before_with_index) 4439 break 4440 4441 if index == self._index: 4442 break 4443 4444 return self.expression(exp.Group, **elements) # type: ignore 4445 4446 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4447 return self.expression( 4448 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4449 ) 4450 4451 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4452 if self._match(TokenType.L_PAREN): 4453 grouping_set = self._parse_csv(self._parse_column) 4454 self._match_r_paren() 4455 return self.expression(exp.Tuple, expressions=grouping_set) 4456 4457 return self._parse_column() 4458 4459 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4460 if not skip_having_token and not self._match(TokenType.HAVING): 4461 return None 4462 return self.expression(exp.Having, this=self._parse_assignment()) 4463 4464 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4465 if not self._match(TokenType.QUALIFY): 4466 return None 4467 return self.expression(exp.Qualify, this=self._parse_assignment()) 4468 4469 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4470 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4471 exp.Prior, this=self._parse_bitwise() 4472 ) 4473 connect = self._parse_assignment() 4474 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4475 return connect 4476 4477 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4478 if skip_start_token: 4479 start = None 4480 elif self._match(TokenType.START_WITH): 4481 start = self._parse_assignment() 4482 else: 4483 return None 4484 4485 self._match(TokenType.CONNECT_BY) 4486 nocycle = self._match_text_seq("NOCYCLE") 4487 connect = self._parse_connect_with_prior() 4488 4489 if not start and self._match(TokenType.START_WITH): 4490 start = self._parse_assignment() 4491 4492 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4493 4494 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4495 this = self._parse_id_var(any_token=True) 4496 if self._match(TokenType.ALIAS): 4497 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4498 return this 4499 4500 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4501 if self._match_text_seq("INTERPOLATE"): 4502 return self._parse_wrapped_csv(self._parse_name_as_expression) 4503 return None 4504 4505 def _parse_order( 4506 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4507 ) -> t.Optional[exp.Expression]: 4508 siblings = None 4509 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4510 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4511 return this 4512 4513 siblings = True 4514 4515 return self.expression( 4516 exp.Order, 4517 this=this, 4518 expressions=self._parse_csv(self._parse_ordered), 4519 siblings=siblings, 4520 ) 4521 4522 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4523 if not self._match(token): 4524 return None 4525 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4526 4527 def _parse_ordered( 4528 self, parse_method: t.Optional[t.Callable] = None 4529 ) -> t.Optional[exp.Ordered]: 4530 this = parse_method() if parse_method else self._parse_assignment() 4531 if not this: 4532 return None 4533 4534 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4535 this = exp.var("ALL") 4536 4537 asc = self._match(TokenType.ASC) 4538 desc = self._match(TokenType.DESC) or (asc and False) 4539 4540 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4541 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4542 4543 nulls_first = is_nulls_first or False 4544 explicitly_null_ordered = is_nulls_first or is_nulls_last 4545 4546 if ( 4547 not explicitly_null_ordered 4548 and ( 4549 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4550 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4551 ) 4552 and self.dialect.NULL_ORDERING != "nulls_are_last" 4553 ): 4554 nulls_first = True 4555 4556 if self._match_text_seq("WITH", "FILL"): 4557 with_fill = self.expression( 4558 exp.WithFill, 4559 **{ # type: ignore 4560 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4561 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4562 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4563 "interpolate": self._parse_interpolate(), 4564 }, 4565 ) 4566 else: 4567 with_fill = None 4568 4569 return self.expression( 4570 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4571 ) 4572 4573 def _parse_limit_options(self) -> exp.LimitOptions: 4574 percent = self._match(TokenType.PERCENT) 4575 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4576 self._match_text_seq("ONLY") 4577 with_ties = self._match_text_seq("WITH", "TIES") 4578 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4579 4580 def _parse_limit( 4581 self, 4582 this: t.Optional[exp.Expression] = None, 4583 top: bool = False, 4584 skip_limit_token: bool = False, 4585 ) -> t.Optional[exp.Expression]: 4586 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4587 comments = self._prev_comments 4588 if top: 4589 limit_paren = self._match(TokenType.L_PAREN) 4590 expression = self._parse_term() if limit_paren else self._parse_number() 4591 4592 if limit_paren: 4593 self._match_r_paren() 4594 4595 limit_options = self._parse_limit_options() 4596 else: 4597 limit_options = None 4598 expression = self._parse_term() 4599 4600 if self._match(TokenType.COMMA): 4601 offset = expression 4602 expression = self._parse_term() 4603 else: 4604 offset = None 4605 4606 limit_exp = self.expression( 4607 exp.Limit, 4608 this=this, 4609 expression=expression, 4610 offset=offset, 4611 comments=comments, 4612 limit_options=limit_options, 4613 expressions=self._parse_limit_by(), 4614 ) 4615 4616 return limit_exp 4617 4618 if self._match(TokenType.FETCH): 4619 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4620 direction = self._prev.text.upper() if direction else "FIRST" 4621 4622 count = self._parse_field(tokens=self.FETCH_TOKENS) 4623 4624 return self.expression( 4625 exp.Fetch, 4626 direction=direction, 4627 count=count, 4628 limit_options=self._parse_limit_options(), 4629 ) 4630 4631 return this 4632 4633 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4634 if not self._match(TokenType.OFFSET): 4635 return this 4636 4637 count = self._parse_term() 4638 self._match_set((TokenType.ROW, TokenType.ROWS)) 4639 4640 return self.expression( 4641 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4642 ) 4643 4644 def _can_parse_limit_or_offset(self) -> bool: 4645 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4646 return False 4647 4648 index = self._index 4649 result = bool( 4650 self._try_parse(self._parse_limit, retreat=True) 4651 or self._try_parse(self._parse_offset, retreat=True) 4652 ) 4653 self._retreat(index) 4654 return result 4655 4656 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4657 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4658 4659 def _parse_locks(self) -> t.List[exp.Lock]: 4660 locks = [] 4661 while True: 4662 if self._match_text_seq("FOR", "UPDATE"): 4663 update = True 4664 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4665 "LOCK", "IN", "SHARE", "MODE" 4666 ): 4667 update = False 4668 else: 4669 break 4670 4671 expressions = None 4672 if self._match_text_seq("OF"): 4673 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4674 4675 wait: t.Optional[bool | exp.Expression] = None 4676 if self._match_text_seq("NOWAIT"): 4677 wait = True 4678 elif self._match_text_seq("WAIT"): 4679 wait = self._parse_primary() 4680 elif self._match_text_seq("SKIP", "LOCKED"): 4681 wait = False 4682 4683 locks.append( 4684 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4685 ) 4686 4687 return locks 4688 4689 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4690 start = self._index 4691 _, side_token, kind_token = self._parse_join_parts() 4692 4693 side = side_token.text if side_token else None 4694 kind = kind_token.text if kind_token else None 4695 4696 if not self._match_set(self.SET_OPERATIONS): 4697 self._retreat(start) 4698 return None 4699 4700 token_type = self._prev.token_type 4701 4702 if token_type == TokenType.UNION: 4703 operation: t.Type[exp.SetOperation] = exp.Union 4704 elif token_type == TokenType.EXCEPT: 4705 operation = exp.Except 4706 else: 4707 operation = exp.Intersect 4708 4709 comments = self._prev.comments 4710 4711 if self._match(TokenType.DISTINCT): 4712 distinct: t.Optional[bool] = True 4713 elif self._match(TokenType.ALL): 4714 distinct = False 4715 else: 4716 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4717 if distinct is None: 4718 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4719 4720 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4721 "STRICT", "CORRESPONDING" 4722 ) 4723 if self._match_text_seq("CORRESPONDING"): 4724 by_name = True 4725 if not side and not kind: 4726 kind = "INNER" 4727 4728 on_column_list = None 4729 if by_name and self._match_texts(("ON", "BY")): 4730 on_column_list = self._parse_wrapped_csv(self._parse_column) 4731 4732 expression = self._parse_select(nested=True, parse_set_operation=False) 4733 4734 return self.expression( 4735 operation, 4736 comments=comments, 4737 this=this, 4738 distinct=distinct, 4739 by_name=by_name, 4740 expression=expression, 4741 side=side, 4742 kind=kind, 4743 on=on_column_list, 4744 ) 4745 4746 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4747 while this: 4748 setop = self.parse_set_operation(this) 4749 if not setop: 4750 break 4751 this = setop 4752 4753 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4754 expression = this.expression 4755 4756 if expression: 4757 for arg in self.SET_OP_MODIFIERS: 4758 expr = expression.args.get(arg) 4759 if expr: 4760 this.set(arg, expr.pop()) 4761 4762 return this 4763 4764 def _parse_expression(self) -> t.Optional[exp.Expression]: 4765 return self._parse_alias(self._parse_assignment()) 4766 4767 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4768 this = self._parse_disjunction() 4769 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4770 # This allows us to parse <non-identifier token> := <expr> 4771 this = exp.column( 4772 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4773 ) 4774 4775 while self._match_set(self.ASSIGNMENT): 4776 if isinstance(this, exp.Column) and len(this.parts) == 1: 4777 this = this.this 4778 4779 this = self.expression( 4780 self.ASSIGNMENT[self._prev.token_type], 4781 this=this, 4782 comments=self._prev_comments, 4783 expression=self._parse_assignment(), 4784 ) 4785 4786 return this 4787 4788 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4789 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4790 4791 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4792 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4793 4794 def _parse_equality(self) -> t.Optional[exp.Expression]: 4795 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4796 4797 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4798 return self._parse_tokens(self._parse_range, self.COMPARISON) 4799 4800 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4801 this = this or self._parse_bitwise() 4802 negate = self._match(TokenType.NOT) 4803 4804 if self._match_set(self.RANGE_PARSERS): 4805 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4806 if not expression: 4807 return this 4808 4809 this = expression 4810 elif self._match(TokenType.ISNULL): 4811 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4812 4813 # Postgres supports ISNULL and NOTNULL for conditions. 4814 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4815 if self._match(TokenType.NOTNULL): 4816 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4817 this = self.expression(exp.Not, this=this) 4818 4819 if negate: 4820 this = self._negate_range(this) 4821 4822 if self._match(TokenType.IS): 4823 this = self._parse_is(this) 4824 4825 return this 4826 4827 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4828 if not this: 4829 return this 4830 4831 return self.expression(exp.Not, this=this) 4832 4833 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4834 index = self._index - 1 4835 negate = self._match(TokenType.NOT) 4836 4837 if self._match_text_seq("DISTINCT", "FROM"): 4838 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4839 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4840 4841 if self._match(TokenType.JSON): 4842 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4843 4844 if self._match_text_seq("WITH"): 4845 _with = True 4846 elif self._match_text_seq("WITHOUT"): 4847 _with = False 4848 else: 4849 _with = None 4850 4851 unique = self._match(TokenType.UNIQUE) 4852 self._match_text_seq("KEYS") 4853 expression: t.Optional[exp.Expression] = self.expression( 4854 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4855 ) 4856 else: 4857 expression = self._parse_primary() or self._parse_null() 4858 if not expression: 4859 self._retreat(index) 4860 return None 4861 4862 this = self.expression(exp.Is, this=this, expression=expression) 4863 return self.expression(exp.Not, this=this) if negate else this 4864 4865 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4866 unnest = self._parse_unnest(with_alias=False) 4867 if unnest: 4868 this = self.expression(exp.In, this=this, unnest=unnest) 4869 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4870 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4871 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4872 4873 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4874 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4875 else: 4876 this = self.expression(exp.In, this=this, expressions=expressions) 4877 4878 if matched_l_paren: 4879 self._match_r_paren(this) 4880 elif not self._match(TokenType.R_BRACKET, expression=this): 4881 self.raise_error("Expecting ]") 4882 else: 4883 this = self.expression(exp.In, this=this, field=self._parse_column()) 4884 4885 return this 4886 4887 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4888 low = self._parse_bitwise() 4889 self._match(TokenType.AND) 4890 high = self._parse_bitwise() 4891 return self.expression(exp.Between, this=this, low=low, high=high) 4892 4893 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4894 if not self._match(TokenType.ESCAPE): 4895 return this 4896 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4897 4898 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4899 index = self._index 4900 4901 if not self._match(TokenType.INTERVAL) and match_interval: 4902 return None 4903 4904 if self._match(TokenType.STRING, advance=False): 4905 this = self._parse_primary() 4906 else: 4907 this = self._parse_term() 4908 4909 if not this or ( 4910 isinstance(this, exp.Column) 4911 and not this.table 4912 and not this.this.quoted 4913 and this.name.upper() == "IS" 4914 ): 4915 self._retreat(index) 4916 return None 4917 4918 unit = self._parse_function() or ( 4919 not self._match(TokenType.ALIAS, advance=False) 4920 and self._parse_var(any_token=True, upper=True) 4921 ) 4922 4923 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4924 # each INTERVAL expression into this canonical form so it's easy to transpile 4925 if this and this.is_number: 4926 this = exp.Literal.string(this.to_py()) 4927 elif this and this.is_string: 4928 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4929 if parts and unit: 4930 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4931 unit = None 4932 self._retreat(self._index - 1) 4933 4934 if len(parts) == 1: 4935 this = exp.Literal.string(parts[0][0]) 4936 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4937 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4938 unit = self.expression( 4939 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4940 ) 4941 4942 interval = self.expression(exp.Interval, this=this, unit=unit) 4943 4944 index = self._index 4945 self._match(TokenType.PLUS) 4946 4947 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4948 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4949 return self.expression( 4950 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4951 ) 4952 4953 self._retreat(index) 4954 return interval 4955 4956 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4957 this = self._parse_term() 4958 4959 while True: 4960 if self._match_set(self.BITWISE): 4961 this = self.expression( 4962 self.BITWISE[self._prev.token_type], 4963 this=this, 4964 expression=self._parse_term(), 4965 ) 4966 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4967 this = self.expression( 4968 exp.DPipe, 4969 this=this, 4970 expression=self._parse_term(), 4971 safe=not self.dialect.STRICT_STRING_CONCAT, 4972 ) 4973 elif self._match(TokenType.DQMARK): 4974 this = self.expression( 4975 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4976 ) 4977 elif self._match_pair(TokenType.LT, TokenType.LT): 4978 this = self.expression( 4979 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4980 ) 4981 elif self._match_pair(TokenType.GT, TokenType.GT): 4982 this = self.expression( 4983 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4984 ) 4985 else: 4986 break 4987 4988 return this 4989 4990 def _parse_term(self) -> t.Optional[exp.Expression]: 4991 this = self._parse_factor() 4992 4993 while self._match_set(self.TERM): 4994 klass = self.TERM[self._prev.token_type] 4995 comments = self._prev_comments 4996 expression = self._parse_factor() 4997 4998 this = self.expression(klass, this=this, comments=comments, expression=expression) 4999 5000 if isinstance(this, exp.Collate): 5001 expr = this.expression 5002 5003 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5004 # fallback to Identifier / Var 5005 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5006 ident = expr.this 5007 if isinstance(ident, exp.Identifier): 5008 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5009 5010 return this 5011 5012 def _parse_factor(self) -> t.Optional[exp.Expression]: 5013 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5014 this = parse_method() 5015 5016 while self._match_set(self.FACTOR): 5017 klass = self.FACTOR[self._prev.token_type] 5018 comments = self._prev_comments 5019 expression = parse_method() 5020 5021 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5022 self._retreat(self._index - 1) 5023 return this 5024 5025 this = self.expression(klass, this=this, comments=comments, expression=expression) 5026 5027 if isinstance(this, exp.Div): 5028 this.args["typed"] = self.dialect.TYPED_DIVISION 5029 this.args["safe"] = self.dialect.SAFE_DIVISION 5030 5031 return this 5032 5033 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5034 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5035 5036 def _parse_unary(self) -> t.Optional[exp.Expression]: 5037 if self._match_set(self.UNARY_PARSERS): 5038 return self.UNARY_PARSERS[self._prev.token_type](self) 5039 return self._parse_at_time_zone(self._parse_type()) 5040 5041 def _parse_type( 5042 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5043 ) -> t.Optional[exp.Expression]: 5044 interval = parse_interval and self._parse_interval() 5045 if interval: 5046 return interval 5047 5048 index = self._index 5049 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5050 5051 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5052 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5053 if isinstance(data_type, exp.Cast): 5054 # This constructor can contain ops directly after it, for instance struct unnesting: 5055 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5056 return self._parse_column_ops(data_type) 5057 5058 if data_type: 5059 index2 = self._index 5060 this = self._parse_primary() 5061 5062 if isinstance(this, exp.Literal): 5063 this = self._parse_column_ops(this) 5064 5065 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5066 if parser: 5067 return parser(self, this, data_type) 5068 5069 return self.expression(exp.Cast, this=this, to=data_type) 5070 5071 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5072 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5073 # 5074 # If the index difference here is greater than 1, that means the parser itself must have 5075 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5076 # 5077 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5078 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5079 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5080 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5081 # 5082 # In these cases, we don't really want to return the converted type, but instead retreat 5083 # and try to parse a Column or Identifier in the section below. 5084 if data_type.expressions and index2 - index > 1: 5085 self._retreat(index2) 5086 return self._parse_column_ops(data_type) 5087 5088 self._retreat(index) 5089 5090 if fallback_to_identifier: 5091 return self._parse_id_var() 5092 5093 this = self._parse_column() 5094 return this and self._parse_column_ops(this) 5095 5096 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5097 this = self._parse_type() 5098 if not this: 5099 return None 5100 5101 if isinstance(this, exp.Column) and not this.table: 5102 this = exp.var(this.name.upper()) 5103 5104 return self.expression( 5105 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5106 ) 5107 5108 def _parse_types( 5109 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5110 ) -> t.Optional[exp.Expression]: 5111 index = self._index 5112 5113 this: t.Optional[exp.Expression] = None 5114 prefix = self._match_text_seq("SYSUDTLIB", ".") 5115 5116 if not self._match_set(self.TYPE_TOKENS): 5117 identifier = allow_identifiers and self._parse_id_var( 5118 any_token=False, tokens=(TokenType.VAR,) 5119 ) 5120 if isinstance(identifier, exp.Identifier): 5121 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5122 5123 if len(tokens) != 1: 5124 self.raise_error("Unexpected identifier", self._prev) 5125 5126 if tokens[0].token_type in self.TYPE_TOKENS: 5127 self._prev = tokens[0] 5128 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5129 type_name = identifier.name 5130 5131 while self._match(TokenType.DOT): 5132 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5133 5134 this = exp.DataType.build(type_name, udt=True) 5135 else: 5136 self._retreat(self._index - 1) 5137 return None 5138 else: 5139 return None 5140 5141 type_token = self._prev.token_type 5142 5143 if type_token == TokenType.PSEUDO_TYPE: 5144 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5145 5146 if type_token == TokenType.OBJECT_IDENTIFIER: 5147 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5148 5149 # https://materialize.com/docs/sql/types/map/ 5150 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5151 key_type = self._parse_types( 5152 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5153 ) 5154 if not self._match(TokenType.FARROW): 5155 self._retreat(index) 5156 return None 5157 5158 value_type = self._parse_types( 5159 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5160 ) 5161 if not self._match(TokenType.R_BRACKET): 5162 self._retreat(index) 5163 return None 5164 5165 return exp.DataType( 5166 this=exp.DataType.Type.MAP, 5167 expressions=[key_type, value_type], 5168 nested=True, 5169 prefix=prefix, 5170 ) 5171 5172 nested = type_token in self.NESTED_TYPE_TOKENS 5173 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5174 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5175 expressions = None 5176 maybe_func = False 5177 5178 if self._match(TokenType.L_PAREN): 5179 if is_struct: 5180 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5181 elif nested: 5182 expressions = self._parse_csv( 5183 lambda: self._parse_types( 5184 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5185 ) 5186 ) 5187 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5188 this = expressions[0] 5189 this.set("nullable", True) 5190 self._match_r_paren() 5191 return this 5192 elif type_token in self.ENUM_TYPE_TOKENS: 5193 expressions = self._parse_csv(self._parse_equality) 5194 elif is_aggregate: 5195 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5196 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5197 ) 5198 if not func_or_ident: 5199 return None 5200 expressions = [func_or_ident] 5201 if self._match(TokenType.COMMA): 5202 expressions.extend( 5203 self._parse_csv( 5204 lambda: self._parse_types( 5205 check_func=check_func, 5206 schema=schema, 5207 allow_identifiers=allow_identifiers, 5208 ) 5209 ) 5210 ) 5211 else: 5212 expressions = self._parse_csv(self._parse_type_size) 5213 5214 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5215 if type_token == TokenType.VECTOR and len(expressions) == 2: 5216 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5217 5218 if not expressions or not self._match(TokenType.R_PAREN): 5219 self._retreat(index) 5220 return None 5221 5222 maybe_func = True 5223 5224 values: t.Optional[t.List[exp.Expression]] = None 5225 5226 if nested and self._match(TokenType.LT): 5227 if is_struct: 5228 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5229 else: 5230 expressions = self._parse_csv( 5231 lambda: self._parse_types( 5232 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5233 ) 5234 ) 5235 5236 if not self._match(TokenType.GT): 5237 self.raise_error("Expecting >") 5238 5239 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5240 values = self._parse_csv(self._parse_assignment) 5241 if not values and is_struct: 5242 values = None 5243 self._retreat(self._index - 1) 5244 else: 5245 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5246 5247 if type_token in self.TIMESTAMPS: 5248 if self._match_text_seq("WITH", "TIME", "ZONE"): 5249 maybe_func = False 5250 tz_type = ( 5251 exp.DataType.Type.TIMETZ 5252 if type_token in self.TIMES 5253 else exp.DataType.Type.TIMESTAMPTZ 5254 ) 5255 this = exp.DataType(this=tz_type, expressions=expressions) 5256 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5257 maybe_func = False 5258 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5259 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5260 maybe_func = False 5261 elif type_token == TokenType.INTERVAL: 5262 unit = self._parse_var(upper=True) 5263 if unit: 5264 if self._match_text_seq("TO"): 5265 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5266 5267 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5268 else: 5269 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5270 elif type_token == TokenType.VOID: 5271 this = exp.DataType(this=exp.DataType.Type.NULL) 5272 5273 if maybe_func and check_func: 5274 index2 = self._index 5275 peek = self._parse_string() 5276 5277 if not peek: 5278 self._retreat(index) 5279 return None 5280 5281 self._retreat(index2) 5282 5283 if not this: 5284 if self._match_text_seq("UNSIGNED"): 5285 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5286 if not unsigned_type_token: 5287 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5288 5289 type_token = unsigned_type_token or type_token 5290 5291 this = exp.DataType( 5292 this=exp.DataType.Type[type_token.value], 5293 expressions=expressions, 5294 nested=nested, 5295 prefix=prefix, 5296 ) 5297 5298 # Empty arrays/structs are allowed 5299 if values is not None: 5300 cls = exp.Struct if is_struct else exp.Array 5301 this = exp.cast(cls(expressions=values), this, copy=False) 5302 5303 elif expressions: 5304 this.set("expressions", expressions) 5305 5306 # https://materialize.com/docs/sql/types/list/#type-name 5307 while self._match(TokenType.LIST): 5308 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5309 5310 index = self._index 5311 5312 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5313 matched_array = self._match(TokenType.ARRAY) 5314 5315 while self._curr: 5316 datatype_token = self._prev.token_type 5317 matched_l_bracket = self._match(TokenType.L_BRACKET) 5318 5319 if (not matched_l_bracket and not matched_array) or ( 5320 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5321 ): 5322 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5323 # not to be confused with the fixed size array parsing 5324 break 5325 5326 matched_array = False 5327 values = self._parse_csv(self._parse_assignment) or None 5328 if ( 5329 values 5330 and not schema 5331 and ( 5332 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5333 ) 5334 ): 5335 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5336 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5337 self._retreat(index) 5338 break 5339 5340 this = exp.DataType( 5341 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5342 ) 5343 self._match(TokenType.R_BRACKET) 5344 5345 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5346 converter = self.TYPE_CONVERTERS.get(this.this) 5347 if converter: 5348 this = converter(t.cast(exp.DataType, this)) 5349 5350 return this 5351 5352 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5353 index = self._index 5354 5355 if ( 5356 self._curr 5357 and self._next 5358 and self._curr.token_type in self.TYPE_TOKENS 5359 and self._next.token_type in self.TYPE_TOKENS 5360 ): 5361 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5362 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5363 this = self._parse_id_var() 5364 else: 5365 this = ( 5366 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5367 or self._parse_id_var() 5368 ) 5369 5370 self._match(TokenType.COLON) 5371 5372 if ( 5373 type_required 5374 and not isinstance(this, exp.DataType) 5375 and not self._match_set(self.TYPE_TOKENS, advance=False) 5376 ): 5377 self._retreat(index) 5378 return self._parse_types() 5379 5380 return self._parse_column_def(this) 5381 5382 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5383 if not self._match_text_seq("AT", "TIME", "ZONE"): 5384 return this 5385 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5386 5387 def _parse_column(self) -> t.Optional[exp.Expression]: 5388 this = self._parse_column_reference() 5389 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5390 5391 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5392 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5393 5394 return column 5395 5396 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5397 this = self._parse_field() 5398 if ( 5399 not this 5400 and self._match(TokenType.VALUES, advance=False) 5401 and self.VALUES_FOLLOWED_BY_PAREN 5402 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5403 ): 5404 this = self._parse_id_var() 5405 5406 if isinstance(this, exp.Identifier): 5407 # We bubble up comments from the Identifier to the Column 5408 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5409 5410 return this 5411 5412 def _parse_colon_as_variant_extract( 5413 self, this: t.Optional[exp.Expression] 5414 ) -> t.Optional[exp.Expression]: 5415 casts = [] 5416 json_path = [] 5417 escape = None 5418 5419 while self._match(TokenType.COLON): 5420 start_index = self._index 5421 5422 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5423 path = self._parse_column_ops( 5424 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5425 ) 5426 5427 # The cast :: operator has a lower precedence than the extraction operator :, so 5428 # we rearrange the AST appropriately to avoid casting the JSON path 5429 while isinstance(path, exp.Cast): 5430 casts.append(path.to) 5431 path = path.this 5432 5433 if casts: 5434 dcolon_offset = next( 5435 i 5436 for i, t in enumerate(self._tokens[start_index:]) 5437 if t.token_type == TokenType.DCOLON 5438 ) 5439 end_token = self._tokens[start_index + dcolon_offset - 1] 5440 else: 5441 end_token = self._prev 5442 5443 if path: 5444 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5445 # it'll roundtrip to a string literal in GET_PATH 5446 if isinstance(path, exp.Identifier) and path.quoted: 5447 escape = True 5448 5449 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5450 5451 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5452 # Databricks transforms it back to the colon/dot notation 5453 if json_path: 5454 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5455 5456 if json_path_expr: 5457 json_path_expr.set("escape", escape) 5458 5459 this = self.expression( 5460 exp.JSONExtract, 5461 this=this, 5462 expression=json_path_expr, 5463 variant_extract=True, 5464 ) 5465 5466 while casts: 5467 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5468 5469 return this 5470 5471 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5472 return self._parse_types() 5473 5474 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5475 this = self._parse_bracket(this) 5476 5477 while self._match_set(self.COLUMN_OPERATORS): 5478 op_token = self._prev.token_type 5479 op = self.COLUMN_OPERATORS.get(op_token) 5480 5481 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5482 field = self._parse_dcolon() 5483 if not field: 5484 self.raise_error("Expected type") 5485 elif op and self._curr: 5486 field = self._parse_column_reference() or self._parse_bracket() 5487 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5488 field = self._parse_column_ops(field) 5489 else: 5490 field = self._parse_field(any_token=True, anonymous_func=True) 5491 5492 if isinstance(field, (exp.Func, exp.Window)) and this: 5493 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5494 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5495 this = exp.replace_tree( 5496 this, 5497 lambda n: ( 5498 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5499 if n.table 5500 else n.this 5501 ) 5502 if isinstance(n, exp.Column) 5503 else n, 5504 ) 5505 5506 if op: 5507 this = op(self, this, field) 5508 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5509 this = self.expression( 5510 exp.Column, 5511 comments=this.comments, 5512 this=field, 5513 table=this.this, 5514 db=this.args.get("table"), 5515 catalog=this.args.get("db"), 5516 ) 5517 elif isinstance(field, exp.Window): 5518 # Move the exp.Dot's to the window's function 5519 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5520 field.set("this", window_func) 5521 this = field 5522 else: 5523 this = self.expression(exp.Dot, this=this, expression=field) 5524 5525 if field and field.comments: 5526 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5527 5528 this = self._parse_bracket(this) 5529 5530 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5531 5532 def _parse_primary(self) -> t.Optional[exp.Expression]: 5533 if self._match_set(self.PRIMARY_PARSERS): 5534 token_type = self._prev.token_type 5535 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5536 5537 if token_type == TokenType.STRING: 5538 expressions = [primary] 5539 while self._match(TokenType.STRING): 5540 expressions.append(exp.Literal.string(self._prev.text)) 5541 5542 if len(expressions) > 1: 5543 return self.expression(exp.Concat, expressions=expressions) 5544 5545 return primary 5546 5547 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5548 return exp.Literal.number(f"0.{self._prev.text}") 5549 5550 if self._match(TokenType.L_PAREN): 5551 comments = self._prev_comments 5552 query = self._parse_select() 5553 5554 if query: 5555 expressions = [query] 5556 else: 5557 expressions = self._parse_expressions() 5558 5559 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5560 5561 if not this and self._match(TokenType.R_PAREN, advance=False): 5562 this = self.expression(exp.Tuple) 5563 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5564 this = self._parse_subquery(this=this, parse_alias=False) 5565 elif isinstance(this, exp.Subquery): 5566 this = self._parse_subquery( 5567 this=self._parse_set_operations(this), parse_alias=False 5568 ) 5569 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5570 this = self.expression(exp.Tuple, expressions=expressions) 5571 else: 5572 this = self.expression(exp.Paren, this=this) 5573 5574 if this: 5575 this.add_comments(comments) 5576 5577 self._match_r_paren(expression=this) 5578 return this 5579 5580 return None 5581 5582 def _parse_field( 5583 self, 5584 any_token: bool = False, 5585 tokens: t.Optional[t.Collection[TokenType]] = None, 5586 anonymous_func: bool = False, 5587 ) -> t.Optional[exp.Expression]: 5588 if anonymous_func: 5589 field = ( 5590 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5591 or self._parse_primary() 5592 ) 5593 else: 5594 field = self._parse_primary() or self._parse_function( 5595 anonymous=anonymous_func, any_token=any_token 5596 ) 5597 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5598 5599 def _parse_function( 5600 self, 5601 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5602 anonymous: bool = False, 5603 optional_parens: bool = True, 5604 any_token: bool = False, 5605 ) -> t.Optional[exp.Expression]: 5606 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5607 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5608 fn_syntax = False 5609 if ( 5610 self._match(TokenType.L_BRACE, advance=False) 5611 and self._next 5612 and self._next.text.upper() == "FN" 5613 ): 5614 self._advance(2) 5615 fn_syntax = True 5616 5617 func = self._parse_function_call( 5618 functions=functions, 5619 anonymous=anonymous, 5620 optional_parens=optional_parens, 5621 any_token=any_token, 5622 ) 5623 5624 if fn_syntax: 5625 self._match(TokenType.R_BRACE) 5626 5627 return func 5628 5629 def _parse_function_call( 5630 self, 5631 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5632 anonymous: bool = False, 5633 optional_parens: bool = True, 5634 any_token: bool = False, 5635 ) -> t.Optional[exp.Expression]: 5636 if not self._curr: 5637 return None 5638 5639 comments = self._curr.comments 5640 token = self._curr 5641 token_type = self._curr.token_type 5642 this = self._curr.text 5643 upper = this.upper() 5644 5645 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5646 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5647 self._advance() 5648 return self._parse_window(parser(self)) 5649 5650 if not self._next or self._next.token_type != TokenType.L_PAREN: 5651 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5652 self._advance() 5653 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5654 5655 return None 5656 5657 if any_token: 5658 if token_type in self.RESERVED_TOKENS: 5659 return None 5660 elif token_type not in self.FUNC_TOKENS: 5661 return None 5662 5663 self._advance(2) 5664 5665 parser = self.FUNCTION_PARSERS.get(upper) 5666 if parser and not anonymous: 5667 this = parser(self) 5668 else: 5669 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5670 5671 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5672 this = self.expression( 5673 subquery_predicate, comments=comments, this=self._parse_select() 5674 ) 5675 self._match_r_paren() 5676 return this 5677 5678 if functions is None: 5679 functions = self.FUNCTIONS 5680 5681 function = functions.get(upper) 5682 known_function = function and not anonymous 5683 5684 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5685 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5686 5687 post_func_comments = self._curr and self._curr.comments 5688 if known_function and post_func_comments: 5689 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5690 # call we'll construct it as exp.Anonymous, even if it's "known" 5691 if any( 5692 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5693 for comment in post_func_comments 5694 ): 5695 known_function = False 5696 5697 if alias and known_function: 5698 args = self._kv_to_prop_eq(args) 5699 5700 if known_function: 5701 func_builder = t.cast(t.Callable, function) 5702 5703 if "dialect" in func_builder.__code__.co_varnames: 5704 func = func_builder(args, dialect=self.dialect) 5705 else: 5706 func = func_builder(args) 5707 5708 func = self.validate_expression(func, args) 5709 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5710 func.meta["name"] = this 5711 5712 this = func 5713 else: 5714 if token_type == TokenType.IDENTIFIER: 5715 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5716 5717 this = self.expression(exp.Anonymous, this=this, expressions=args) 5718 this = this.update_positions(token) 5719 5720 if isinstance(this, exp.Expression): 5721 this.add_comments(comments) 5722 5723 self._match_r_paren(this) 5724 return self._parse_window(this) 5725 5726 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5727 return expression 5728 5729 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5730 transformed = [] 5731 5732 for index, e in enumerate(expressions): 5733 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5734 if isinstance(e, exp.Alias): 5735 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5736 5737 if not isinstance(e, exp.PropertyEQ): 5738 e = self.expression( 5739 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5740 ) 5741 5742 if isinstance(e.this, exp.Column): 5743 e.this.replace(e.this.this) 5744 else: 5745 e = self._to_prop_eq(e, index) 5746 5747 transformed.append(e) 5748 5749 return transformed 5750 5751 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5752 return self._parse_statement() 5753 5754 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5755 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5756 5757 def _parse_user_defined_function( 5758 self, kind: t.Optional[TokenType] = None 5759 ) -> t.Optional[exp.Expression]: 5760 this = self._parse_table_parts(schema=True) 5761 5762 if not self._match(TokenType.L_PAREN): 5763 return this 5764 5765 expressions = self._parse_csv(self._parse_function_parameter) 5766 self._match_r_paren() 5767 return self.expression( 5768 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5769 ) 5770 5771 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5772 literal = self._parse_primary() 5773 if literal: 5774 return self.expression(exp.Introducer, this=token.text, expression=literal) 5775 5776 return self._identifier_expression(token) 5777 5778 def _parse_session_parameter(self) -> exp.SessionParameter: 5779 kind = None 5780 this = self._parse_id_var() or self._parse_primary() 5781 5782 if this and self._match(TokenType.DOT): 5783 kind = this.name 5784 this = self._parse_var() or self._parse_primary() 5785 5786 return self.expression(exp.SessionParameter, this=this, kind=kind) 5787 5788 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5789 return self._parse_id_var() 5790 5791 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5792 index = self._index 5793 5794 if self._match(TokenType.L_PAREN): 5795 expressions = t.cast( 5796 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5797 ) 5798 5799 if not self._match(TokenType.R_PAREN): 5800 self._retreat(index) 5801 else: 5802 expressions = [self._parse_lambda_arg()] 5803 5804 if self._match_set(self.LAMBDAS): 5805 return self.LAMBDAS[self._prev.token_type](self, expressions) 5806 5807 self._retreat(index) 5808 5809 this: t.Optional[exp.Expression] 5810 5811 if self._match(TokenType.DISTINCT): 5812 this = self.expression( 5813 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5814 ) 5815 else: 5816 this = self._parse_select_or_expression(alias=alias) 5817 5818 return self._parse_limit( 5819 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5820 ) 5821 5822 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5823 index = self._index 5824 if not self._match(TokenType.L_PAREN): 5825 return this 5826 5827 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5828 # expr can be of both types 5829 if self._match_set(self.SELECT_START_TOKENS): 5830 self._retreat(index) 5831 return this 5832 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5833 self._match_r_paren() 5834 return self.expression(exp.Schema, this=this, expressions=args) 5835 5836 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5837 return self._parse_column_def(self._parse_field(any_token=True)) 5838 5839 def _parse_column_def( 5840 self, this: t.Optional[exp.Expression], computed_column: bool = True 5841 ) -> t.Optional[exp.Expression]: 5842 # column defs are not really columns, they're identifiers 5843 if isinstance(this, exp.Column): 5844 this = this.this 5845 5846 if not computed_column: 5847 self._match(TokenType.ALIAS) 5848 5849 kind = self._parse_types(schema=True) 5850 5851 if self._match_text_seq("FOR", "ORDINALITY"): 5852 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5853 5854 constraints: t.List[exp.Expression] = [] 5855 5856 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5857 ("ALIAS", "MATERIALIZED") 5858 ): 5859 persisted = self._prev.text.upper() == "MATERIALIZED" 5860 constraint_kind = exp.ComputedColumnConstraint( 5861 this=self._parse_assignment(), 5862 persisted=persisted or self._match_text_seq("PERSISTED"), 5863 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5864 ) 5865 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5866 elif ( 5867 kind 5868 and self._match(TokenType.ALIAS, advance=False) 5869 and ( 5870 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5871 or (self._next and self._next.token_type == TokenType.L_PAREN) 5872 ) 5873 ): 5874 self._advance() 5875 constraints.append( 5876 self.expression( 5877 exp.ColumnConstraint, 5878 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5879 ) 5880 ) 5881 5882 while True: 5883 constraint = self._parse_column_constraint() 5884 if not constraint: 5885 break 5886 constraints.append(constraint) 5887 5888 if not kind and not constraints: 5889 return this 5890 5891 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5892 5893 def _parse_auto_increment( 5894 self, 5895 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5896 start = None 5897 increment = None 5898 5899 if self._match(TokenType.L_PAREN, advance=False): 5900 args = self._parse_wrapped_csv(self._parse_bitwise) 5901 start = seq_get(args, 0) 5902 increment = seq_get(args, 1) 5903 elif self._match_text_seq("START"): 5904 start = self._parse_bitwise() 5905 self._match_text_seq("INCREMENT") 5906 increment = self._parse_bitwise() 5907 5908 if start and increment: 5909 return exp.GeneratedAsIdentityColumnConstraint( 5910 start=start, increment=increment, this=False 5911 ) 5912 5913 return exp.AutoIncrementColumnConstraint() 5914 5915 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5916 if not self._match_text_seq("REFRESH"): 5917 self._retreat(self._index - 1) 5918 return None 5919 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5920 5921 def _parse_compress(self) -> exp.CompressColumnConstraint: 5922 if self._match(TokenType.L_PAREN, advance=False): 5923 return self.expression( 5924 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5925 ) 5926 5927 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5928 5929 def _parse_generated_as_identity( 5930 self, 5931 ) -> ( 5932 exp.GeneratedAsIdentityColumnConstraint 5933 | exp.ComputedColumnConstraint 5934 | exp.GeneratedAsRowColumnConstraint 5935 ): 5936 if self._match_text_seq("BY", "DEFAULT"): 5937 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5938 this = self.expression( 5939 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5940 ) 5941 else: 5942 self._match_text_seq("ALWAYS") 5943 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5944 5945 self._match(TokenType.ALIAS) 5946 5947 if self._match_text_seq("ROW"): 5948 start = self._match_text_seq("START") 5949 if not start: 5950 self._match(TokenType.END) 5951 hidden = self._match_text_seq("HIDDEN") 5952 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5953 5954 identity = self._match_text_seq("IDENTITY") 5955 5956 if self._match(TokenType.L_PAREN): 5957 if self._match(TokenType.START_WITH): 5958 this.set("start", self._parse_bitwise()) 5959 if self._match_text_seq("INCREMENT", "BY"): 5960 this.set("increment", self._parse_bitwise()) 5961 if self._match_text_seq("MINVALUE"): 5962 this.set("minvalue", self._parse_bitwise()) 5963 if self._match_text_seq("MAXVALUE"): 5964 this.set("maxvalue", self._parse_bitwise()) 5965 5966 if self._match_text_seq("CYCLE"): 5967 this.set("cycle", True) 5968 elif self._match_text_seq("NO", "CYCLE"): 5969 this.set("cycle", False) 5970 5971 if not identity: 5972 this.set("expression", self._parse_range()) 5973 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5974 args = self._parse_csv(self._parse_bitwise) 5975 this.set("start", seq_get(args, 0)) 5976 this.set("increment", seq_get(args, 1)) 5977 5978 self._match_r_paren() 5979 5980 return this 5981 5982 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5983 self._match_text_seq("LENGTH") 5984 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5985 5986 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5987 if self._match_text_seq("NULL"): 5988 return self.expression(exp.NotNullColumnConstraint) 5989 if self._match_text_seq("CASESPECIFIC"): 5990 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5991 if self._match_text_seq("FOR", "REPLICATION"): 5992 return self.expression(exp.NotForReplicationColumnConstraint) 5993 5994 # Unconsume the `NOT` token 5995 self._retreat(self._index - 1) 5996 return None 5997 5998 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5999 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6000 6001 procedure_option_follows = ( 6002 self._match(TokenType.WITH, advance=False) 6003 and self._next 6004 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6005 ) 6006 6007 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6008 return self.expression( 6009 exp.ColumnConstraint, 6010 this=this, 6011 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6012 ) 6013 6014 return this 6015 6016 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6017 if not self._match(TokenType.CONSTRAINT): 6018 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6019 6020 return self.expression( 6021 exp.Constraint, 6022 this=self._parse_id_var(), 6023 expressions=self._parse_unnamed_constraints(), 6024 ) 6025 6026 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6027 constraints = [] 6028 while True: 6029 constraint = self._parse_unnamed_constraint() or self._parse_function() 6030 if not constraint: 6031 break 6032 constraints.append(constraint) 6033 6034 return constraints 6035 6036 def _parse_unnamed_constraint( 6037 self, constraints: t.Optional[t.Collection[str]] = None 6038 ) -> t.Optional[exp.Expression]: 6039 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6040 constraints or self.CONSTRAINT_PARSERS 6041 ): 6042 return None 6043 6044 constraint = self._prev.text.upper() 6045 if constraint not in self.CONSTRAINT_PARSERS: 6046 self.raise_error(f"No parser found for schema constraint {constraint}.") 6047 6048 return self.CONSTRAINT_PARSERS[constraint](self) 6049 6050 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6051 return self._parse_id_var(any_token=False) 6052 6053 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6054 self._match_text_seq("KEY") 6055 return self.expression( 6056 exp.UniqueColumnConstraint, 6057 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6058 this=self._parse_schema(self._parse_unique_key()), 6059 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6060 on_conflict=self._parse_on_conflict(), 6061 options=self._parse_key_constraint_options(), 6062 ) 6063 6064 def _parse_key_constraint_options(self) -> t.List[str]: 6065 options = [] 6066 while True: 6067 if not self._curr: 6068 break 6069 6070 if self._match(TokenType.ON): 6071 action = None 6072 on = self._advance_any() and self._prev.text 6073 6074 if self._match_text_seq("NO", "ACTION"): 6075 action = "NO ACTION" 6076 elif self._match_text_seq("CASCADE"): 6077 action = "CASCADE" 6078 elif self._match_text_seq("RESTRICT"): 6079 action = "RESTRICT" 6080 elif self._match_pair(TokenType.SET, TokenType.NULL): 6081 action = "SET NULL" 6082 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6083 action = "SET DEFAULT" 6084 else: 6085 self.raise_error("Invalid key constraint") 6086 6087 options.append(f"ON {on} {action}") 6088 else: 6089 var = self._parse_var_from_options( 6090 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6091 ) 6092 if not var: 6093 break 6094 options.append(var.name) 6095 6096 return options 6097 6098 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6099 if match and not self._match(TokenType.REFERENCES): 6100 return None 6101 6102 expressions = None 6103 this = self._parse_table(schema=True) 6104 options = self._parse_key_constraint_options() 6105 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6106 6107 def _parse_foreign_key(self) -> exp.ForeignKey: 6108 expressions = ( 6109 self._parse_wrapped_id_vars() 6110 if not self._match(TokenType.REFERENCES, advance=False) 6111 else None 6112 ) 6113 reference = self._parse_references() 6114 on_options = {} 6115 6116 while self._match(TokenType.ON): 6117 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6118 self.raise_error("Expected DELETE or UPDATE") 6119 6120 kind = self._prev.text.lower() 6121 6122 if self._match_text_seq("NO", "ACTION"): 6123 action = "NO ACTION" 6124 elif self._match(TokenType.SET): 6125 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6126 action = "SET " + self._prev.text.upper() 6127 else: 6128 self._advance() 6129 action = self._prev.text.upper() 6130 6131 on_options[kind] = action 6132 6133 return self.expression( 6134 exp.ForeignKey, 6135 expressions=expressions, 6136 reference=reference, 6137 options=self._parse_key_constraint_options(), 6138 **on_options, # type: ignore 6139 ) 6140 6141 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6142 return self._parse_ordered() or self._parse_field() 6143 6144 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6145 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6146 self._retreat(self._index - 1) 6147 return None 6148 6149 id_vars = self._parse_wrapped_id_vars() 6150 return self.expression( 6151 exp.PeriodForSystemTimeConstraint, 6152 this=seq_get(id_vars, 0), 6153 expression=seq_get(id_vars, 1), 6154 ) 6155 6156 def _parse_primary_key( 6157 self, wrapped_optional: bool = False, in_props: bool = False 6158 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6159 desc = ( 6160 self._match_set((TokenType.ASC, TokenType.DESC)) 6161 and self._prev.token_type == TokenType.DESC 6162 ) 6163 6164 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6165 return self.expression( 6166 exp.PrimaryKeyColumnConstraint, 6167 desc=desc, 6168 options=self._parse_key_constraint_options(), 6169 ) 6170 6171 expressions = self._parse_wrapped_csv( 6172 self._parse_primary_key_part, optional=wrapped_optional 6173 ) 6174 options = self._parse_key_constraint_options() 6175 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6176 6177 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6178 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6179 6180 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6181 """ 6182 Parses a datetime column in ODBC format. We parse the column into the corresponding 6183 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6184 same as we did for `DATE('yyyy-mm-dd')`. 6185 6186 Reference: 6187 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6188 """ 6189 self._match(TokenType.VAR) 6190 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6191 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6192 if not self._match(TokenType.R_BRACE): 6193 self.raise_error("Expected }") 6194 return expression 6195 6196 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6197 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6198 return this 6199 6200 bracket_kind = self._prev.token_type 6201 if ( 6202 bracket_kind == TokenType.L_BRACE 6203 and self._curr 6204 and self._curr.token_type == TokenType.VAR 6205 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6206 ): 6207 return self._parse_odbc_datetime_literal() 6208 6209 expressions = self._parse_csv( 6210 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6211 ) 6212 6213 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6214 self.raise_error("Expected ]") 6215 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6216 self.raise_error("Expected }") 6217 6218 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6219 if bracket_kind == TokenType.L_BRACE: 6220 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6221 elif not this: 6222 this = build_array_constructor( 6223 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6224 ) 6225 else: 6226 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6227 if constructor_type: 6228 return build_array_constructor( 6229 constructor_type, 6230 args=expressions, 6231 bracket_kind=bracket_kind, 6232 dialect=self.dialect, 6233 ) 6234 6235 expressions = apply_index_offset( 6236 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6237 ) 6238 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6239 6240 self._add_comments(this) 6241 return self._parse_bracket(this) 6242 6243 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6244 if self._match(TokenType.COLON): 6245 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6246 return this 6247 6248 def _parse_case(self) -> t.Optional[exp.Expression]: 6249 ifs = [] 6250 default = None 6251 6252 comments = self._prev_comments 6253 expression = self._parse_assignment() 6254 6255 while self._match(TokenType.WHEN): 6256 this = self._parse_assignment() 6257 self._match(TokenType.THEN) 6258 then = self._parse_assignment() 6259 ifs.append(self.expression(exp.If, this=this, true=then)) 6260 6261 if self._match(TokenType.ELSE): 6262 default = self._parse_assignment() 6263 6264 if not self._match(TokenType.END): 6265 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6266 default = exp.column("interval") 6267 else: 6268 self.raise_error("Expected END after CASE", self._prev) 6269 6270 return self.expression( 6271 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6272 ) 6273 6274 def _parse_if(self) -> t.Optional[exp.Expression]: 6275 if self._match(TokenType.L_PAREN): 6276 args = self._parse_csv( 6277 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6278 ) 6279 this = self.validate_expression(exp.If.from_arg_list(args), args) 6280 self._match_r_paren() 6281 else: 6282 index = self._index - 1 6283 6284 if self.NO_PAREN_IF_COMMANDS and index == 0: 6285 return self._parse_as_command(self._prev) 6286 6287 condition = self._parse_assignment() 6288 6289 if not condition: 6290 self._retreat(index) 6291 return None 6292 6293 self._match(TokenType.THEN) 6294 true = self._parse_assignment() 6295 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6296 self._match(TokenType.END) 6297 this = self.expression(exp.If, this=condition, true=true, false=false) 6298 6299 return this 6300 6301 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6302 if not self._match_text_seq("VALUE", "FOR"): 6303 self._retreat(self._index - 1) 6304 return None 6305 6306 return self.expression( 6307 exp.NextValueFor, 6308 this=self._parse_column(), 6309 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6310 ) 6311 6312 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6313 this = self._parse_function() or self._parse_var_or_string(upper=True) 6314 6315 if self._match(TokenType.FROM): 6316 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6317 6318 if not self._match(TokenType.COMMA): 6319 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6320 6321 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6322 6323 def _parse_gap_fill(self) -> exp.GapFill: 6324 self._match(TokenType.TABLE) 6325 this = self._parse_table() 6326 6327 self._match(TokenType.COMMA) 6328 args = [this, *self._parse_csv(self._parse_lambda)] 6329 6330 gap_fill = exp.GapFill.from_arg_list(args) 6331 return self.validate_expression(gap_fill, args) 6332 6333 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6334 this = self._parse_assignment() 6335 6336 if not self._match(TokenType.ALIAS): 6337 if self._match(TokenType.COMMA): 6338 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6339 6340 self.raise_error("Expected AS after CAST") 6341 6342 fmt = None 6343 to = self._parse_types() 6344 6345 default = self._match(TokenType.DEFAULT) 6346 if default: 6347 default = self._parse_bitwise() 6348 self._match_text_seq("ON", "CONVERSION", "ERROR") 6349 6350 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6351 fmt_string = self._parse_string() 6352 fmt = self._parse_at_time_zone(fmt_string) 6353 6354 if not to: 6355 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6356 if to.this in exp.DataType.TEMPORAL_TYPES: 6357 this = self.expression( 6358 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6359 this=this, 6360 format=exp.Literal.string( 6361 format_time( 6362 fmt_string.this if fmt_string else "", 6363 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6364 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6365 ) 6366 ), 6367 safe=safe, 6368 ) 6369 6370 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6371 this.set("zone", fmt.args["zone"]) 6372 return this 6373 elif not to: 6374 self.raise_error("Expected TYPE after CAST") 6375 elif isinstance(to, exp.Identifier): 6376 to = exp.DataType.build(to.name, udt=True) 6377 elif to.this == exp.DataType.Type.CHAR: 6378 if self._match(TokenType.CHARACTER_SET): 6379 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6380 6381 return self.expression( 6382 exp.Cast if strict else exp.TryCast, 6383 this=this, 6384 to=to, 6385 format=fmt, 6386 safe=safe, 6387 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6388 default=default, 6389 ) 6390 6391 def _parse_string_agg(self) -> exp.GroupConcat: 6392 if self._match(TokenType.DISTINCT): 6393 args: t.List[t.Optional[exp.Expression]] = [ 6394 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6395 ] 6396 if self._match(TokenType.COMMA): 6397 args.extend(self._parse_csv(self._parse_assignment)) 6398 else: 6399 args = self._parse_csv(self._parse_assignment) # type: ignore 6400 6401 if self._match_text_seq("ON", "OVERFLOW"): 6402 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6403 if self._match_text_seq("ERROR"): 6404 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6405 else: 6406 self._match_text_seq("TRUNCATE") 6407 on_overflow = self.expression( 6408 exp.OverflowTruncateBehavior, 6409 this=self._parse_string(), 6410 with_count=( 6411 self._match_text_seq("WITH", "COUNT") 6412 or not self._match_text_seq("WITHOUT", "COUNT") 6413 ), 6414 ) 6415 else: 6416 on_overflow = None 6417 6418 index = self._index 6419 if not self._match(TokenType.R_PAREN) and args: 6420 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6421 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6422 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6423 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6424 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6425 6426 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6427 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6428 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6429 if not self._match_text_seq("WITHIN", "GROUP"): 6430 self._retreat(index) 6431 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6432 6433 # The corresponding match_r_paren will be called in parse_function (caller) 6434 self._match_l_paren() 6435 6436 return self.expression( 6437 exp.GroupConcat, 6438 this=self._parse_order(this=seq_get(args, 0)), 6439 separator=seq_get(args, 1), 6440 on_overflow=on_overflow, 6441 ) 6442 6443 def _parse_convert( 6444 self, strict: bool, safe: t.Optional[bool] = None 6445 ) -> t.Optional[exp.Expression]: 6446 this = self._parse_bitwise() 6447 6448 if self._match(TokenType.USING): 6449 to: t.Optional[exp.Expression] = self.expression( 6450 exp.CharacterSet, this=self._parse_var() 6451 ) 6452 elif self._match(TokenType.COMMA): 6453 to = self._parse_types() 6454 else: 6455 to = None 6456 6457 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6458 6459 def _parse_xml_table(self) -> exp.XMLTable: 6460 namespaces = None 6461 passing = None 6462 columns = None 6463 6464 if self._match_text_seq("XMLNAMESPACES", "("): 6465 namespaces = self._parse_xml_namespace() 6466 self._match_text_seq(")", ",") 6467 6468 this = self._parse_string() 6469 6470 if self._match_text_seq("PASSING"): 6471 # The BY VALUE keywords are optional and are provided for semantic clarity 6472 self._match_text_seq("BY", "VALUE") 6473 passing = self._parse_csv(self._parse_column) 6474 6475 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6476 6477 if self._match_text_seq("COLUMNS"): 6478 columns = self._parse_csv(self._parse_field_def) 6479 6480 return self.expression( 6481 exp.XMLTable, 6482 this=this, 6483 namespaces=namespaces, 6484 passing=passing, 6485 columns=columns, 6486 by_ref=by_ref, 6487 ) 6488 6489 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6490 namespaces = [] 6491 6492 while True: 6493 if self._match(TokenType.DEFAULT): 6494 uri = self._parse_string() 6495 else: 6496 uri = self._parse_alias(self._parse_string()) 6497 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6498 if not self._match(TokenType.COMMA): 6499 break 6500 6501 return namespaces 6502 6503 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6504 """ 6505 There are generally two variants of the DECODE function: 6506 6507 - DECODE(bin, charset) 6508 - DECODE(expression, search, result [, search, result] ... [, default]) 6509 6510 The second variant will always be parsed into a CASE expression. Note that NULL 6511 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6512 instead of relying on pattern matching. 6513 """ 6514 args = self._parse_csv(self._parse_assignment) 6515 6516 if len(args) < 3: 6517 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6518 6519 expression, *expressions = args 6520 if not expression: 6521 return None 6522 6523 ifs = [] 6524 for search, result in zip(expressions[::2], expressions[1::2]): 6525 if not search or not result: 6526 return None 6527 6528 if isinstance(search, exp.Literal): 6529 ifs.append( 6530 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6531 ) 6532 elif isinstance(search, exp.Null): 6533 ifs.append( 6534 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6535 ) 6536 else: 6537 cond = exp.or_( 6538 exp.EQ(this=expression.copy(), expression=search), 6539 exp.and_( 6540 exp.Is(this=expression.copy(), expression=exp.Null()), 6541 exp.Is(this=search.copy(), expression=exp.Null()), 6542 copy=False, 6543 ), 6544 copy=False, 6545 ) 6546 ifs.append(exp.If(this=cond, true=result)) 6547 6548 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6549 6550 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6551 self._match_text_seq("KEY") 6552 key = self._parse_column() 6553 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6554 self._match_text_seq("VALUE") 6555 value = self._parse_bitwise() 6556 6557 if not key and not value: 6558 return None 6559 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6560 6561 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6562 if not this or not self._match_text_seq("FORMAT", "JSON"): 6563 return this 6564 6565 return self.expression(exp.FormatJson, this=this) 6566 6567 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6568 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6569 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6570 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6571 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6572 else: 6573 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6574 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6575 6576 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6577 6578 if not empty and not error and not null: 6579 return None 6580 6581 return self.expression( 6582 exp.OnCondition, 6583 empty=empty, 6584 error=error, 6585 null=null, 6586 ) 6587 6588 def _parse_on_handling( 6589 self, on: str, *values: str 6590 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6591 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6592 for value in values: 6593 if self._match_text_seq(value, "ON", on): 6594 return f"{value} ON {on}" 6595 6596 index = self._index 6597 if self._match(TokenType.DEFAULT): 6598 default_value = self._parse_bitwise() 6599 if self._match_text_seq("ON", on): 6600 return default_value 6601 6602 self._retreat(index) 6603 6604 return None 6605 6606 @t.overload 6607 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6608 6609 @t.overload 6610 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6611 6612 def _parse_json_object(self, agg=False): 6613 star = self._parse_star() 6614 expressions = ( 6615 [star] 6616 if star 6617 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6618 ) 6619 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6620 6621 unique_keys = None 6622 if self._match_text_seq("WITH", "UNIQUE"): 6623 unique_keys = True 6624 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6625 unique_keys = False 6626 6627 self._match_text_seq("KEYS") 6628 6629 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6630 self._parse_type() 6631 ) 6632 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6633 6634 return self.expression( 6635 exp.JSONObjectAgg if agg else exp.JSONObject, 6636 expressions=expressions, 6637 null_handling=null_handling, 6638 unique_keys=unique_keys, 6639 return_type=return_type, 6640 encoding=encoding, 6641 ) 6642 6643 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6644 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6645 if not self._match_text_seq("NESTED"): 6646 this = self._parse_id_var() 6647 kind = self._parse_types(allow_identifiers=False) 6648 nested = None 6649 else: 6650 this = None 6651 kind = None 6652 nested = True 6653 6654 path = self._match_text_seq("PATH") and self._parse_string() 6655 nested_schema = nested and self._parse_json_schema() 6656 6657 return self.expression( 6658 exp.JSONColumnDef, 6659 this=this, 6660 kind=kind, 6661 path=path, 6662 nested_schema=nested_schema, 6663 ) 6664 6665 def _parse_json_schema(self) -> exp.JSONSchema: 6666 self._match_text_seq("COLUMNS") 6667 return self.expression( 6668 exp.JSONSchema, 6669 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6670 ) 6671 6672 def _parse_json_table(self) -> exp.JSONTable: 6673 this = self._parse_format_json(self._parse_bitwise()) 6674 path = self._match(TokenType.COMMA) and self._parse_string() 6675 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6676 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6677 schema = self._parse_json_schema() 6678 6679 return exp.JSONTable( 6680 this=this, 6681 schema=schema, 6682 path=path, 6683 error_handling=error_handling, 6684 empty_handling=empty_handling, 6685 ) 6686 6687 def _parse_match_against(self) -> exp.MatchAgainst: 6688 expressions = self._parse_csv(self._parse_column) 6689 6690 self._match_text_seq(")", "AGAINST", "(") 6691 6692 this = self._parse_string() 6693 6694 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6695 modifier = "IN NATURAL LANGUAGE MODE" 6696 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6697 modifier = f"{modifier} WITH QUERY EXPANSION" 6698 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6699 modifier = "IN BOOLEAN MODE" 6700 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6701 modifier = "WITH QUERY EXPANSION" 6702 else: 6703 modifier = None 6704 6705 return self.expression( 6706 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6707 ) 6708 6709 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6710 def _parse_open_json(self) -> exp.OpenJSON: 6711 this = self._parse_bitwise() 6712 path = self._match(TokenType.COMMA) and self._parse_string() 6713 6714 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6715 this = self._parse_field(any_token=True) 6716 kind = self._parse_types() 6717 path = self._parse_string() 6718 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6719 6720 return self.expression( 6721 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6722 ) 6723 6724 expressions = None 6725 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6726 self._match_l_paren() 6727 expressions = self._parse_csv(_parse_open_json_column_def) 6728 6729 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6730 6731 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6732 args = self._parse_csv(self._parse_bitwise) 6733 6734 if self._match(TokenType.IN): 6735 return self.expression( 6736 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6737 ) 6738 6739 if haystack_first: 6740 haystack = seq_get(args, 0) 6741 needle = seq_get(args, 1) 6742 else: 6743 haystack = seq_get(args, 1) 6744 needle = seq_get(args, 0) 6745 6746 return self.expression( 6747 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6748 ) 6749 6750 def _parse_predict(self) -> exp.Predict: 6751 self._match_text_seq("MODEL") 6752 this = self._parse_table() 6753 6754 self._match(TokenType.COMMA) 6755 self._match_text_seq("TABLE") 6756 6757 return self.expression( 6758 exp.Predict, 6759 this=this, 6760 expression=self._parse_table(), 6761 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6762 ) 6763 6764 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6765 args = self._parse_csv(self._parse_table) 6766 return exp.JoinHint(this=func_name.upper(), expressions=args) 6767 6768 def _parse_substring(self) -> exp.Substring: 6769 # Postgres supports the form: substring(string [from int] [for int]) 6770 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6771 6772 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6773 6774 if self._match(TokenType.FROM): 6775 args.append(self._parse_bitwise()) 6776 if self._match(TokenType.FOR): 6777 if len(args) == 1: 6778 args.append(exp.Literal.number(1)) 6779 args.append(self._parse_bitwise()) 6780 6781 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6782 6783 def _parse_trim(self) -> exp.Trim: 6784 # https://www.w3resource.com/sql/character-functions/trim.php 6785 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6786 6787 position = None 6788 collation = None 6789 expression = None 6790 6791 if self._match_texts(self.TRIM_TYPES): 6792 position = self._prev.text.upper() 6793 6794 this = self._parse_bitwise() 6795 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6796 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6797 expression = self._parse_bitwise() 6798 6799 if invert_order: 6800 this, expression = expression, this 6801 6802 if self._match(TokenType.COLLATE): 6803 collation = self._parse_bitwise() 6804 6805 return self.expression( 6806 exp.Trim, this=this, position=position, expression=expression, collation=collation 6807 ) 6808 6809 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6810 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6811 6812 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6813 return self._parse_window(self._parse_id_var(), alias=True) 6814 6815 def _parse_respect_or_ignore_nulls( 6816 self, this: t.Optional[exp.Expression] 6817 ) -> t.Optional[exp.Expression]: 6818 if self._match_text_seq("IGNORE", "NULLS"): 6819 return self.expression(exp.IgnoreNulls, this=this) 6820 if self._match_text_seq("RESPECT", "NULLS"): 6821 return self.expression(exp.RespectNulls, this=this) 6822 return this 6823 6824 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6825 if self._match(TokenType.HAVING): 6826 self._match_texts(("MAX", "MIN")) 6827 max = self._prev.text.upper() != "MIN" 6828 return self.expression( 6829 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6830 ) 6831 6832 return this 6833 6834 def _parse_window( 6835 self, this: t.Optional[exp.Expression], alias: bool = False 6836 ) -> t.Optional[exp.Expression]: 6837 func = this 6838 comments = func.comments if isinstance(func, exp.Expression) else None 6839 6840 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6841 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6842 if self._match_text_seq("WITHIN", "GROUP"): 6843 order = self._parse_wrapped(self._parse_order) 6844 this = self.expression(exp.WithinGroup, this=this, expression=order) 6845 6846 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6847 self._match(TokenType.WHERE) 6848 this = self.expression( 6849 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6850 ) 6851 self._match_r_paren() 6852 6853 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6854 # Some dialects choose to implement and some do not. 6855 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6856 6857 # There is some code above in _parse_lambda that handles 6858 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6859 6860 # The below changes handle 6861 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6862 6863 # Oracle allows both formats 6864 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6865 # and Snowflake chose to do the same for familiarity 6866 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6867 if isinstance(this, exp.AggFunc): 6868 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6869 6870 if ignore_respect and ignore_respect is not this: 6871 ignore_respect.replace(ignore_respect.this) 6872 this = self.expression(ignore_respect.__class__, this=this) 6873 6874 this = self._parse_respect_or_ignore_nulls(this) 6875 6876 # bigquery select from window x AS (partition by ...) 6877 if alias: 6878 over = None 6879 self._match(TokenType.ALIAS) 6880 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6881 return this 6882 else: 6883 over = self._prev.text.upper() 6884 6885 if comments and isinstance(func, exp.Expression): 6886 func.pop_comments() 6887 6888 if not self._match(TokenType.L_PAREN): 6889 return self.expression( 6890 exp.Window, 6891 comments=comments, 6892 this=this, 6893 alias=self._parse_id_var(False), 6894 over=over, 6895 ) 6896 6897 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6898 6899 first = self._match(TokenType.FIRST) 6900 if self._match_text_seq("LAST"): 6901 first = False 6902 6903 partition, order = self._parse_partition_and_order() 6904 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6905 6906 if kind: 6907 self._match(TokenType.BETWEEN) 6908 start = self._parse_window_spec() 6909 self._match(TokenType.AND) 6910 end = self._parse_window_spec() 6911 exclude = ( 6912 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6913 if self._match_text_seq("EXCLUDE") 6914 else None 6915 ) 6916 6917 spec = self.expression( 6918 exp.WindowSpec, 6919 kind=kind, 6920 start=start["value"], 6921 start_side=start["side"], 6922 end=end["value"], 6923 end_side=end["side"], 6924 exclude=exclude, 6925 ) 6926 else: 6927 spec = None 6928 6929 self._match_r_paren() 6930 6931 window = self.expression( 6932 exp.Window, 6933 comments=comments, 6934 this=this, 6935 partition_by=partition, 6936 order=order, 6937 spec=spec, 6938 alias=window_alias, 6939 over=over, 6940 first=first, 6941 ) 6942 6943 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6944 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6945 return self._parse_window(window, alias=alias) 6946 6947 return window 6948 6949 def _parse_partition_and_order( 6950 self, 6951 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6952 return self._parse_partition_by(), self._parse_order() 6953 6954 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6955 self._match(TokenType.BETWEEN) 6956 6957 return { 6958 "value": ( 6959 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6960 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6961 or self._parse_bitwise() 6962 ), 6963 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6964 } 6965 6966 def _parse_alias( 6967 self, this: t.Optional[exp.Expression], explicit: bool = False 6968 ) -> t.Optional[exp.Expression]: 6969 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6970 # so this section tries to parse the clause version and if it fails, it treats the token 6971 # as an identifier (alias) 6972 if self._can_parse_limit_or_offset(): 6973 return this 6974 6975 any_token = self._match(TokenType.ALIAS) 6976 comments = self._prev_comments or [] 6977 6978 if explicit and not any_token: 6979 return this 6980 6981 if self._match(TokenType.L_PAREN): 6982 aliases = self.expression( 6983 exp.Aliases, 6984 comments=comments, 6985 this=this, 6986 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6987 ) 6988 self._match_r_paren(aliases) 6989 return aliases 6990 6991 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6992 self.STRING_ALIASES and self._parse_string_as_identifier() 6993 ) 6994 6995 if alias: 6996 comments.extend(alias.pop_comments()) 6997 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6998 column = this.this 6999 7000 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7001 if not this.comments and column and column.comments: 7002 this.comments = column.pop_comments() 7003 7004 return this 7005 7006 def _parse_id_var( 7007 self, 7008 any_token: bool = True, 7009 tokens: t.Optional[t.Collection[TokenType]] = None, 7010 ) -> t.Optional[exp.Expression]: 7011 expression = self._parse_identifier() 7012 if not expression and ( 7013 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7014 ): 7015 quoted = self._prev.token_type == TokenType.STRING 7016 expression = self._identifier_expression(quoted=quoted) 7017 7018 return expression 7019 7020 def _parse_string(self) -> t.Optional[exp.Expression]: 7021 if self._match_set(self.STRING_PARSERS): 7022 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7023 return self._parse_placeholder() 7024 7025 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7026 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7027 if output: 7028 output.update_positions(self._prev) 7029 return output 7030 7031 def _parse_number(self) -> t.Optional[exp.Expression]: 7032 if self._match_set(self.NUMERIC_PARSERS): 7033 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7034 return self._parse_placeholder() 7035 7036 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7037 if self._match(TokenType.IDENTIFIER): 7038 return self._identifier_expression(quoted=True) 7039 return self._parse_placeholder() 7040 7041 def _parse_var( 7042 self, 7043 any_token: bool = False, 7044 tokens: t.Optional[t.Collection[TokenType]] = None, 7045 upper: bool = False, 7046 ) -> t.Optional[exp.Expression]: 7047 if ( 7048 (any_token and self._advance_any()) 7049 or self._match(TokenType.VAR) 7050 or (self._match_set(tokens) if tokens else False) 7051 ): 7052 return self.expression( 7053 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7054 ) 7055 return self._parse_placeholder() 7056 7057 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7058 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7059 self._advance() 7060 return self._prev 7061 return None 7062 7063 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7064 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7065 7066 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7067 return self._parse_primary() or self._parse_var(any_token=True) 7068 7069 def _parse_null(self) -> t.Optional[exp.Expression]: 7070 if self._match_set(self.NULL_TOKENS): 7071 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7072 return self._parse_placeholder() 7073 7074 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7075 if self._match(TokenType.TRUE): 7076 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7077 if self._match(TokenType.FALSE): 7078 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7079 return self._parse_placeholder() 7080 7081 def _parse_star(self) -> t.Optional[exp.Expression]: 7082 if self._match(TokenType.STAR): 7083 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7084 return self._parse_placeholder() 7085 7086 def _parse_parameter(self) -> exp.Parameter: 7087 this = self._parse_identifier() or self._parse_primary_or_var() 7088 return self.expression(exp.Parameter, this=this) 7089 7090 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7091 if self._match_set(self.PLACEHOLDER_PARSERS): 7092 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7093 if placeholder: 7094 return placeholder 7095 self._advance(-1) 7096 return None 7097 7098 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7099 if not self._match_texts(keywords): 7100 return None 7101 if self._match(TokenType.L_PAREN, advance=False): 7102 return self._parse_wrapped_csv(self._parse_expression) 7103 7104 expression = self._parse_expression() 7105 return [expression] if expression else None 7106 7107 def _parse_csv( 7108 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7109 ) -> t.List[exp.Expression]: 7110 parse_result = parse_method() 7111 items = [parse_result] if parse_result is not None else [] 7112 7113 while self._match(sep): 7114 self._add_comments(parse_result) 7115 parse_result = parse_method() 7116 if parse_result is not None: 7117 items.append(parse_result) 7118 7119 return items 7120 7121 def _parse_tokens( 7122 self, parse_method: t.Callable, expressions: t.Dict 7123 ) -> t.Optional[exp.Expression]: 7124 this = parse_method() 7125 7126 while self._match_set(expressions): 7127 this = self.expression( 7128 expressions[self._prev.token_type], 7129 this=this, 7130 comments=self._prev_comments, 7131 expression=parse_method(), 7132 ) 7133 7134 return this 7135 7136 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7137 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7138 7139 def _parse_wrapped_csv( 7140 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7141 ) -> t.List[exp.Expression]: 7142 return self._parse_wrapped( 7143 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7144 ) 7145 7146 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7147 wrapped = self._match(TokenType.L_PAREN) 7148 if not wrapped and not optional: 7149 self.raise_error("Expecting (") 7150 parse_result = parse_method() 7151 if wrapped: 7152 self._match_r_paren() 7153 return parse_result 7154 7155 def _parse_expressions(self) -> t.List[exp.Expression]: 7156 return self._parse_csv(self._parse_expression) 7157 7158 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7159 return self._parse_select() or self._parse_set_operations( 7160 self._parse_alias(self._parse_assignment(), explicit=True) 7161 if alias 7162 else self._parse_assignment() 7163 ) 7164 7165 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7166 return self._parse_query_modifiers( 7167 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7168 ) 7169 7170 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7171 this = None 7172 if self._match_texts(self.TRANSACTION_KIND): 7173 this = self._prev.text 7174 7175 self._match_texts(("TRANSACTION", "WORK")) 7176 7177 modes = [] 7178 while True: 7179 mode = [] 7180 while self._match(TokenType.VAR): 7181 mode.append(self._prev.text) 7182 7183 if mode: 7184 modes.append(" ".join(mode)) 7185 if not self._match(TokenType.COMMA): 7186 break 7187 7188 return self.expression(exp.Transaction, this=this, modes=modes) 7189 7190 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7191 chain = None 7192 savepoint = None 7193 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7194 7195 self._match_texts(("TRANSACTION", "WORK")) 7196 7197 if self._match_text_seq("TO"): 7198 self._match_text_seq("SAVEPOINT") 7199 savepoint = self._parse_id_var() 7200 7201 if self._match(TokenType.AND): 7202 chain = not self._match_text_seq("NO") 7203 self._match_text_seq("CHAIN") 7204 7205 if is_rollback: 7206 return self.expression(exp.Rollback, savepoint=savepoint) 7207 7208 return self.expression(exp.Commit, chain=chain) 7209 7210 def _parse_refresh(self) -> exp.Refresh: 7211 self._match(TokenType.TABLE) 7212 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7213 7214 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7215 if not self._match_text_seq("ADD"): 7216 return None 7217 7218 self._match(TokenType.COLUMN) 7219 exists_column = self._parse_exists(not_=True) 7220 expression = self._parse_field_def() 7221 7222 if expression: 7223 expression.set("exists", exists_column) 7224 7225 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7226 if self._match_texts(("FIRST", "AFTER")): 7227 position = self._prev.text 7228 column_position = self.expression( 7229 exp.ColumnPosition, this=self._parse_column(), position=position 7230 ) 7231 expression.set("position", column_position) 7232 7233 return expression 7234 7235 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7236 drop = self._match(TokenType.DROP) and self._parse_drop() 7237 if drop and not isinstance(drop, exp.Command): 7238 drop.set("kind", drop.args.get("kind", "COLUMN")) 7239 return drop 7240 7241 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7242 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7243 return self.expression( 7244 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7245 ) 7246 7247 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7248 index = self._index - 1 7249 7250 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7251 return self._parse_csv( 7252 lambda: self.expression( 7253 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7254 ) 7255 ) 7256 7257 self._retreat(index) 7258 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7259 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7260 7261 if self._match_text_seq("ADD", "COLUMNS"): 7262 schema = self._parse_schema() 7263 if schema: 7264 return [schema] 7265 return [] 7266 7267 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7268 7269 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7270 if self._match_texts(self.ALTER_ALTER_PARSERS): 7271 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7272 7273 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7274 # keyword after ALTER we default to parsing this statement 7275 self._match(TokenType.COLUMN) 7276 column = self._parse_field(any_token=True) 7277 7278 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7279 return self.expression(exp.AlterColumn, this=column, drop=True) 7280 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7281 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7282 if self._match(TokenType.COMMENT): 7283 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7284 if self._match_text_seq("DROP", "NOT", "NULL"): 7285 return self.expression( 7286 exp.AlterColumn, 7287 this=column, 7288 drop=True, 7289 allow_null=True, 7290 ) 7291 if self._match_text_seq("SET", "NOT", "NULL"): 7292 return self.expression( 7293 exp.AlterColumn, 7294 this=column, 7295 allow_null=False, 7296 ) 7297 7298 if self._match_text_seq("SET", "VISIBLE"): 7299 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7300 if self._match_text_seq("SET", "INVISIBLE"): 7301 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7302 7303 self._match_text_seq("SET", "DATA") 7304 self._match_text_seq("TYPE") 7305 return self.expression( 7306 exp.AlterColumn, 7307 this=column, 7308 dtype=self._parse_types(), 7309 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7310 using=self._match(TokenType.USING) and self._parse_assignment(), 7311 ) 7312 7313 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7314 if self._match_texts(("ALL", "EVEN", "AUTO")): 7315 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7316 7317 self._match_text_seq("KEY", "DISTKEY") 7318 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7319 7320 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7321 if compound: 7322 self._match_text_seq("SORTKEY") 7323 7324 if self._match(TokenType.L_PAREN, advance=False): 7325 return self.expression( 7326 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7327 ) 7328 7329 self._match_texts(("AUTO", "NONE")) 7330 return self.expression( 7331 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7332 ) 7333 7334 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7335 index = self._index - 1 7336 7337 partition_exists = self._parse_exists() 7338 if self._match(TokenType.PARTITION, advance=False): 7339 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7340 7341 self._retreat(index) 7342 return self._parse_csv(self._parse_drop_column) 7343 7344 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7345 if self._match(TokenType.COLUMN): 7346 exists = self._parse_exists() 7347 old_column = self._parse_column() 7348 to = self._match_text_seq("TO") 7349 new_column = self._parse_column() 7350 7351 if old_column is None or to is None or new_column is None: 7352 return None 7353 7354 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7355 7356 self._match_text_seq("TO") 7357 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7358 7359 def _parse_alter_table_set(self) -> exp.AlterSet: 7360 alter_set = self.expression(exp.AlterSet) 7361 7362 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7363 "TABLE", "PROPERTIES" 7364 ): 7365 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7366 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7367 alter_set.set("expressions", [self._parse_assignment()]) 7368 elif self._match_texts(("LOGGED", "UNLOGGED")): 7369 alter_set.set("option", exp.var(self._prev.text.upper())) 7370 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7371 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7372 elif self._match_text_seq("LOCATION"): 7373 alter_set.set("location", self._parse_field()) 7374 elif self._match_text_seq("ACCESS", "METHOD"): 7375 alter_set.set("access_method", self._parse_field()) 7376 elif self._match_text_seq("TABLESPACE"): 7377 alter_set.set("tablespace", self._parse_field()) 7378 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7379 alter_set.set("file_format", [self._parse_field()]) 7380 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7381 alter_set.set("file_format", self._parse_wrapped_options()) 7382 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7383 alter_set.set("copy_options", self._parse_wrapped_options()) 7384 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7385 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7386 else: 7387 if self._match_text_seq("SERDE"): 7388 alter_set.set("serde", self._parse_field()) 7389 7390 alter_set.set("expressions", [self._parse_properties()]) 7391 7392 return alter_set 7393 7394 def _parse_alter(self) -> exp.Alter | exp.Command: 7395 start = self._prev 7396 7397 alter_token = self._match_set(self.ALTERABLES) and self._prev 7398 if not alter_token: 7399 return self._parse_as_command(start) 7400 7401 exists = self._parse_exists() 7402 only = self._match_text_seq("ONLY") 7403 this = self._parse_table(schema=True) 7404 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7405 7406 if self._next: 7407 self._advance() 7408 7409 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7410 if parser: 7411 actions = ensure_list(parser(self)) 7412 not_valid = self._match_text_seq("NOT", "VALID") 7413 options = self._parse_csv(self._parse_property) 7414 7415 if not self._curr and actions: 7416 return self.expression( 7417 exp.Alter, 7418 this=this, 7419 kind=alter_token.text.upper(), 7420 exists=exists, 7421 actions=actions, 7422 only=only, 7423 options=options, 7424 cluster=cluster, 7425 not_valid=not_valid, 7426 ) 7427 7428 return self._parse_as_command(start) 7429 7430 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7431 start = self._prev 7432 # https://duckdb.org/docs/sql/statements/analyze 7433 if not self._curr: 7434 return self.expression(exp.Analyze) 7435 7436 options = [] 7437 while self._match_texts(self.ANALYZE_STYLES): 7438 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7439 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7440 else: 7441 options.append(self._prev.text.upper()) 7442 7443 this: t.Optional[exp.Expression] = None 7444 inner_expression: t.Optional[exp.Expression] = None 7445 7446 kind = self._curr and self._curr.text.upper() 7447 7448 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7449 this = self._parse_table_parts() 7450 elif self._match_text_seq("TABLES"): 7451 if self._match_set((TokenType.FROM, TokenType.IN)): 7452 kind = f"{kind} {self._prev.text.upper()}" 7453 this = self._parse_table(schema=True, is_db_reference=True) 7454 elif self._match_text_seq("DATABASE"): 7455 this = self._parse_table(schema=True, is_db_reference=True) 7456 elif self._match_text_seq("CLUSTER"): 7457 this = self._parse_table() 7458 # Try matching inner expr keywords before fallback to parse table. 7459 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7460 kind = None 7461 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7462 else: 7463 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7464 kind = None 7465 this = self._parse_table_parts() 7466 7467 partition = self._try_parse(self._parse_partition) 7468 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7469 return self._parse_as_command(start) 7470 7471 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7472 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7473 "WITH", "ASYNC", "MODE" 7474 ): 7475 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7476 else: 7477 mode = None 7478 7479 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7480 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7481 7482 properties = self._parse_properties() 7483 return self.expression( 7484 exp.Analyze, 7485 kind=kind, 7486 this=this, 7487 mode=mode, 7488 partition=partition, 7489 properties=properties, 7490 expression=inner_expression, 7491 options=options, 7492 ) 7493 7494 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7495 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7496 this = None 7497 kind = self._prev.text.upper() 7498 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7499 expressions = [] 7500 7501 if not self._match_text_seq("STATISTICS"): 7502 self.raise_error("Expecting token STATISTICS") 7503 7504 if self._match_text_seq("NOSCAN"): 7505 this = "NOSCAN" 7506 elif self._match(TokenType.FOR): 7507 if self._match_text_seq("ALL", "COLUMNS"): 7508 this = "FOR ALL COLUMNS" 7509 if self._match_texts("COLUMNS"): 7510 this = "FOR COLUMNS" 7511 expressions = self._parse_csv(self._parse_column_reference) 7512 elif self._match_text_seq("SAMPLE"): 7513 sample = self._parse_number() 7514 expressions = [ 7515 self.expression( 7516 exp.AnalyzeSample, 7517 sample=sample, 7518 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7519 ) 7520 ] 7521 7522 return self.expression( 7523 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7524 ) 7525 7526 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7527 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7528 kind = None 7529 this = None 7530 expression: t.Optional[exp.Expression] = None 7531 if self._match_text_seq("REF", "UPDATE"): 7532 kind = "REF" 7533 this = "UPDATE" 7534 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7535 this = "UPDATE SET DANGLING TO NULL" 7536 elif self._match_text_seq("STRUCTURE"): 7537 kind = "STRUCTURE" 7538 if self._match_text_seq("CASCADE", "FAST"): 7539 this = "CASCADE FAST" 7540 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7541 ("ONLINE", "OFFLINE") 7542 ): 7543 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7544 expression = self._parse_into() 7545 7546 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7547 7548 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7549 this = self._prev.text.upper() 7550 if self._match_text_seq("COLUMNS"): 7551 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7552 return None 7553 7554 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7555 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7556 if self._match_text_seq("STATISTICS"): 7557 return self.expression(exp.AnalyzeDelete, kind=kind) 7558 return None 7559 7560 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7561 if self._match_text_seq("CHAINED", "ROWS"): 7562 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7563 return None 7564 7565 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7566 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7567 this = self._prev.text.upper() 7568 expression: t.Optional[exp.Expression] = None 7569 expressions = [] 7570 update_options = None 7571 7572 if self._match_text_seq("HISTOGRAM", "ON"): 7573 expressions = self._parse_csv(self._parse_column_reference) 7574 with_expressions = [] 7575 while self._match(TokenType.WITH): 7576 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7577 if self._match_texts(("SYNC", "ASYNC")): 7578 if self._match_text_seq("MODE", advance=False): 7579 with_expressions.append(f"{self._prev.text.upper()} MODE") 7580 self._advance() 7581 else: 7582 buckets = self._parse_number() 7583 if self._match_text_seq("BUCKETS"): 7584 with_expressions.append(f"{buckets} BUCKETS") 7585 if with_expressions: 7586 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7587 7588 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7589 TokenType.UPDATE, advance=False 7590 ): 7591 update_options = self._prev.text.upper() 7592 self._advance() 7593 elif self._match_text_seq("USING", "DATA"): 7594 expression = self.expression(exp.UsingData, this=self._parse_string()) 7595 7596 return self.expression( 7597 exp.AnalyzeHistogram, 7598 this=this, 7599 expressions=expressions, 7600 expression=expression, 7601 update_options=update_options, 7602 ) 7603 7604 def _parse_merge(self) -> exp.Merge: 7605 self._match(TokenType.INTO) 7606 target = self._parse_table() 7607 7608 if target and self._match(TokenType.ALIAS, advance=False): 7609 target.set("alias", self._parse_table_alias()) 7610 7611 self._match(TokenType.USING) 7612 using = self._parse_table() 7613 7614 self._match(TokenType.ON) 7615 on = self._parse_assignment() 7616 7617 return self.expression( 7618 exp.Merge, 7619 this=target, 7620 using=using, 7621 on=on, 7622 whens=self._parse_when_matched(), 7623 returning=self._parse_returning(), 7624 ) 7625 7626 def _parse_when_matched(self) -> exp.Whens: 7627 whens = [] 7628 7629 while self._match(TokenType.WHEN): 7630 matched = not self._match(TokenType.NOT) 7631 self._match_text_seq("MATCHED") 7632 source = ( 7633 False 7634 if self._match_text_seq("BY", "TARGET") 7635 else self._match_text_seq("BY", "SOURCE") 7636 ) 7637 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7638 7639 self._match(TokenType.THEN) 7640 7641 if self._match(TokenType.INSERT): 7642 this = self._parse_star() 7643 if this: 7644 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7645 else: 7646 then = self.expression( 7647 exp.Insert, 7648 this=exp.var("ROW") 7649 if self._match_text_seq("ROW") 7650 else self._parse_value(values=False), 7651 expression=self._match_text_seq("VALUES") and self._parse_value(), 7652 ) 7653 elif self._match(TokenType.UPDATE): 7654 expressions = self._parse_star() 7655 if expressions: 7656 then = self.expression(exp.Update, expressions=expressions) 7657 else: 7658 then = self.expression( 7659 exp.Update, 7660 expressions=self._match(TokenType.SET) 7661 and self._parse_csv(self._parse_equality), 7662 ) 7663 elif self._match(TokenType.DELETE): 7664 then = self.expression(exp.Var, this=self._prev.text) 7665 else: 7666 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7667 7668 whens.append( 7669 self.expression( 7670 exp.When, 7671 matched=matched, 7672 source=source, 7673 condition=condition, 7674 then=then, 7675 ) 7676 ) 7677 return self.expression(exp.Whens, expressions=whens) 7678 7679 def _parse_show(self) -> t.Optional[exp.Expression]: 7680 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7681 if parser: 7682 return parser(self) 7683 return self._parse_as_command(self._prev) 7684 7685 def _parse_set_item_assignment( 7686 self, kind: t.Optional[str] = None 7687 ) -> t.Optional[exp.Expression]: 7688 index = self._index 7689 7690 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7691 return self._parse_set_transaction(global_=kind == "GLOBAL") 7692 7693 left = self._parse_primary() or self._parse_column() 7694 assignment_delimiter = self._match_texts(("=", "TO")) 7695 7696 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7697 self._retreat(index) 7698 return None 7699 7700 right = self._parse_statement() or self._parse_id_var() 7701 if isinstance(right, (exp.Column, exp.Identifier)): 7702 right = exp.var(right.name) 7703 7704 this = self.expression(exp.EQ, this=left, expression=right) 7705 return self.expression(exp.SetItem, this=this, kind=kind) 7706 7707 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7708 self._match_text_seq("TRANSACTION") 7709 characteristics = self._parse_csv( 7710 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7711 ) 7712 return self.expression( 7713 exp.SetItem, 7714 expressions=characteristics, 7715 kind="TRANSACTION", 7716 **{"global": global_}, # type: ignore 7717 ) 7718 7719 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7720 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7721 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7722 7723 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7724 index = self._index 7725 set_ = self.expression( 7726 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7727 ) 7728 7729 if self._curr: 7730 self._retreat(index) 7731 return self._parse_as_command(self._prev) 7732 7733 return set_ 7734 7735 def _parse_var_from_options( 7736 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7737 ) -> t.Optional[exp.Var]: 7738 start = self._curr 7739 if not start: 7740 return None 7741 7742 option = start.text.upper() 7743 continuations = options.get(option) 7744 7745 index = self._index 7746 self._advance() 7747 for keywords in continuations or []: 7748 if isinstance(keywords, str): 7749 keywords = (keywords,) 7750 7751 if self._match_text_seq(*keywords): 7752 option = f"{option} {' '.join(keywords)}" 7753 break 7754 else: 7755 if continuations or continuations is None: 7756 if raise_unmatched: 7757 self.raise_error(f"Unknown option {option}") 7758 7759 self._retreat(index) 7760 return None 7761 7762 return exp.var(option) 7763 7764 def _parse_as_command(self, start: Token) -> exp.Command: 7765 while self._curr: 7766 self._advance() 7767 text = self._find_sql(start, self._prev) 7768 size = len(start.text) 7769 self._warn_unsupported() 7770 return exp.Command(this=text[:size], expression=text[size:]) 7771 7772 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7773 settings = [] 7774 7775 self._match_l_paren() 7776 kind = self._parse_id_var() 7777 7778 if self._match(TokenType.L_PAREN): 7779 while True: 7780 key = self._parse_id_var() 7781 value = self._parse_primary() 7782 if not key and value is None: 7783 break 7784 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7785 self._match(TokenType.R_PAREN) 7786 7787 self._match_r_paren() 7788 7789 return self.expression( 7790 exp.DictProperty, 7791 this=this, 7792 kind=kind.this if kind else None, 7793 settings=settings, 7794 ) 7795 7796 def _parse_dict_range(self, this: str) -> exp.DictRange: 7797 self._match_l_paren() 7798 has_min = self._match_text_seq("MIN") 7799 if has_min: 7800 min = self._parse_var() or self._parse_primary() 7801 self._match_text_seq("MAX") 7802 max = self._parse_var() or self._parse_primary() 7803 else: 7804 max = self._parse_var() or self._parse_primary() 7805 min = exp.Literal.number(0) 7806 self._match_r_paren() 7807 return self.expression(exp.DictRange, this=this, min=min, max=max) 7808 7809 def _parse_comprehension( 7810 self, this: t.Optional[exp.Expression] 7811 ) -> t.Optional[exp.Comprehension]: 7812 index = self._index 7813 expression = self._parse_column() 7814 if not self._match(TokenType.IN): 7815 self._retreat(index - 1) 7816 return None 7817 iterator = self._parse_column() 7818 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7819 return self.expression( 7820 exp.Comprehension, 7821 this=this, 7822 expression=expression, 7823 iterator=iterator, 7824 condition=condition, 7825 ) 7826 7827 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7828 if self._match(TokenType.HEREDOC_STRING): 7829 return self.expression(exp.Heredoc, this=self._prev.text) 7830 7831 if not self._match_text_seq("$"): 7832 return None 7833 7834 tags = ["$"] 7835 tag_text = None 7836 7837 if self._is_connected(): 7838 self._advance() 7839 tags.append(self._prev.text.upper()) 7840 else: 7841 self.raise_error("No closing $ found") 7842 7843 if tags[-1] != "$": 7844 if self._is_connected() and self._match_text_seq("$"): 7845 tag_text = tags[-1] 7846 tags.append("$") 7847 else: 7848 self.raise_error("No closing $ found") 7849 7850 heredoc_start = self._curr 7851 7852 while self._curr: 7853 if self._match_text_seq(*tags, advance=False): 7854 this = self._find_sql(heredoc_start, self._prev) 7855 self._advance(len(tags)) 7856 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7857 7858 self._advance() 7859 7860 self.raise_error(f"No closing {''.join(tags)} found") 7861 return None 7862 7863 def _find_parser( 7864 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7865 ) -> t.Optional[t.Callable]: 7866 if not self._curr: 7867 return None 7868 7869 index = self._index 7870 this = [] 7871 while True: 7872 # The current token might be multiple words 7873 curr = self._curr.text.upper() 7874 key = curr.split(" ") 7875 this.append(curr) 7876 7877 self._advance() 7878 result, trie = in_trie(trie, key) 7879 if result == TrieResult.FAILED: 7880 break 7881 7882 if result == TrieResult.EXISTS: 7883 subparser = parsers[" ".join(this)] 7884 return subparser 7885 7886 self._retreat(index) 7887 return None 7888 7889 def _match(self, token_type, advance=True, expression=None): 7890 if not self._curr: 7891 return None 7892 7893 if self._curr.token_type == token_type: 7894 if advance: 7895 self._advance() 7896 self._add_comments(expression) 7897 return True 7898 7899 return None 7900 7901 def _match_set(self, types, advance=True): 7902 if not self._curr: 7903 return None 7904 7905 if self._curr.token_type in types: 7906 if advance: 7907 self._advance() 7908 return True 7909 7910 return None 7911 7912 def _match_pair(self, token_type_a, token_type_b, advance=True): 7913 if not self._curr or not self._next: 7914 return None 7915 7916 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7917 if advance: 7918 self._advance(2) 7919 return True 7920 7921 return None 7922 7923 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7924 if not self._match(TokenType.L_PAREN, expression=expression): 7925 self.raise_error("Expecting (") 7926 7927 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7928 if not self._match(TokenType.R_PAREN, expression=expression): 7929 self.raise_error("Expecting )") 7930 7931 def _match_texts(self, texts, advance=True): 7932 if ( 7933 self._curr 7934 and self._curr.token_type != TokenType.STRING 7935 and self._curr.text.upper() in texts 7936 ): 7937 if advance: 7938 self._advance() 7939 return True 7940 return None 7941 7942 def _match_text_seq(self, *texts, advance=True): 7943 index = self._index 7944 for text in texts: 7945 if ( 7946 self._curr 7947 and self._curr.token_type != TokenType.STRING 7948 and self._curr.text.upper() == text 7949 ): 7950 self._advance() 7951 else: 7952 self._retreat(index) 7953 return None 7954 7955 if not advance: 7956 self._retreat(index) 7957 7958 return True 7959 7960 def _replace_lambda( 7961 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7962 ) -> t.Optional[exp.Expression]: 7963 if not node: 7964 return node 7965 7966 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7967 7968 for column in node.find_all(exp.Column): 7969 typ = lambda_types.get(column.parts[0].name) 7970 if typ is not None: 7971 dot_or_id = column.to_dot() if column.table else column.this 7972 7973 if typ: 7974 dot_or_id = self.expression( 7975 exp.Cast, 7976 this=dot_or_id, 7977 to=typ, 7978 ) 7979 7980 parent = column.parent 7981 7982 while isinstance(parent, exp.Dot): 7983 if not isinstance(parent.parent, exp.Dot): 7984 parent.replace(dot_or_id) 7985 break 7986 parent = parent.parent 7987 else: 7988 if column is node: 7989 node = dot_or_id 7990 else: 7991 column.replace(dot_or_id) 7992 return node 7993 7994 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7995 start = self._prev 7996 7997 # Not to be confused with TRUNCATE(number, decimals) function call 7998 if self._match(TokenType.L_PAREN): 7999 self._retreat(self._index - 2) 8000 return self._parse_function() 8001 8002 # Clickhouse supports TRUNCATE DATABASE as well 8003 is_database = self._match(TokenType.DATABASE) 8004 8005 self._match(TokenType.TABLE) 8006 8007 exists = self._parse_exists(not_=False) 8008 8009 expressions = self._parse_csv( 8010 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8011 ) 8012 8013 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8014 8015 if self._match_text_seq("RESTART", "IDENTITY"): 8016 identity = "RESTART" 8017 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8018 identity = "CONTINUE" 8019 else: 8020 identity = None 8021 8022 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8023 option = self._prev.text 8024 else: 8025 option = None 8026 8027 partition = self._parse_partition() 8028 8029 # Fallback case 8030 if self._curr: 8031 return self._parse_as_command(start) 8032 8033 return self.expression( 8034 exp.TruncateTable, 8035 expressions=expressions, 8036 is_database=is_database, 8037 exists=exists, 8038 cluster=cluster, 8039 identity=identity, 8040 option=option, 8041 partition=partition, 8042 ) 8043 8044 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8045 this = self._parse_ordered(self._parse_opclass) 8046 8047 if not self._match(TokenType.WITH): 8048 return this 8049 8050 op = self._parse_var(any_token=True) 8051 8052 return self.expression(exp.WithOperator, this=this, op=op) 8053 8054 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8055 self._match(TokenType.EQ) 8056 self._match(TokenType.L_PAREN) 8057 8058 opts: t.List[t.Optional[exp.Expression]] = [] 8059 option: exp.Expression | None 8060 while self._curr and not self._match(TokenType.R_PAREN): 8061 if self._match_text_seq("FORMAT_NAME", "="): 8062 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8063 option = self._parse_format_name() 8064 else: 8065 option = self._parse_property() 8066 8067 if option is None: 8068 self.raise_error("Unable to parse option") 8069 break 8070 8071 opts.append(option) 8072 8073 return opts 8074 8075 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8076 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8077 8078 options = [] 8079 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8080 option = self._parse_var(any_token=True) 8081 prev = self._prev.text.upper() 8082 8083 # Different dialects might separate options and values by white space, "=" and "AS" 8084 self._match(TokenType.EQ) 8085 self._match(TokenType.ALIAS) 8086 8087 param = self.expression(exp.CopyParameter, this=option) 8088 8089 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8090 TokenType.L_PAREN, advance=False 8091 ): 8092 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8093 param.set("expressions", self._parse_wrapped_options()) 8094 elif prev == "FILE_FORMAT": 8095 # T-SQL's external file format case 8096 param.set("expression", self._parse_field()) 8097 else: 8098 param.set("expression", self._parse_unquoted_field()) 8099 8100 options.append(param) 8101 self._match(sep) 8102 8103 return options 8104 8105 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8106 expr = self.expression(exp.Credentials) 8107 8108 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8109 expr.set("storage", self._parse_field()) 8110 if self._match_text_seq("CREDENTIALS"): 8111 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8112 creds = ( 8113 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8114 ) 8115 expr.set("credentials", creds) 8116 if self._match_text_seq("ENCRYPTION"): 8117 expr.set("encryption", self._parse_wrapped_options()) 8118 if self._match_text_seq("IAM_ROLE"): 8119 expr.set("iam_role", self._parse_field()) 8120 if self._match_text_seq("REGION"): 8121 expr.set("region", self._parse_field()) 8122 8123 return expr 8124 8125 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8126 return self._parse_field() 8127 8128 def _parse_copy(self) -> exp.Copy | exp.Command: 8129 start = self._prev 8130 8131 self._match(TokenType.INTO) 8132 8133 this = ( 8134 self._parse_select(nested=True, parse_subquery_alias=False) 8135 if self._match(TokenType.L_PAREN, advance=False) 8136 else self._parse_table(schema=True) 8137 ) 8138 8139 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8140 8141 files = self._parse_csv(self._parse_file_location) 8142 credentials = self._parse_credentials() 8143 8144 self._match_text_seq("WITH") 8145 8146 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8147 8148 # Fallback case 8149 if self._curr: 8150 return self._parse_as_command(start) 8151 8152 return self.expression( 8153 exp.Copy, 8154 this=this, 8155 kind=kind, 8156 credentials=credentials, 8157 files=files, 8158 params=params, 8159 ) 8160 8161 def _parse_normalize(self) -> exp.Normalize: 8162 return self.expression( 8163 exp.Normalize, 8164 this=self._parse_bitwise(), 8165 form=self._match(TokenType.COMMA) and self._parse_var(), 8166 ) 8167 8168 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8169 args = self._parse_csv(lambda: self._parse_lambda()) 8170 8171 this = seq_get(args, 0) 8172 decimals = seq_get(args, 1) 8173 8174 return expr_type( 8175 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8176 ) 8177 8178 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8179 if self._match_text_seq("COLUMNS", "(", advance=False): 8180 this = self._parse_function() 8181 if isinstance(this, exp.Columns): 8182 this.set("unpack", True) 8183 return this 8184 8185 return self.expression( 8186 exp.Star, 8187 **{ # type: ignore 8188 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8189 "replace": self._parse_star_op("REPLACE"), 8190 "rename": self._parse_star_op("RENAME"), 8191 }, 8192 ) 8193 8194 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8195 privilege_parts = [] 8196 8197 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8198 # (end of privilege list) or L_PAREN (start of column list) are met 8199 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8200 privilege_parts.append(self._curr.text.upper()) 8201 self._advance() 8202 8203 this = exp.var(" ".join(privilege_parts)) 8204 expressions = ( 8205 self._parse_wrapped_csv(self._parse_column) 8206 if self._match(TokenType.L_PAREN, advance=False) 8207 else None 8208 ) 8209 8210 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8211 8212 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8213 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8214 principal = self._parse_id_var() 8215 8216 if not principal: 8217 return None 8218 8219 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8220 8221 def _parse_grant(self) -> exp.Grant | exp.Command: 8222 start = self._prev 8223 8224 privileges = self._parse_csv(self._parse_grant_privilege) 8225 8226 self._match(TokenType.ON) 8227 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8228 8229 # Attempt to parse the securable e.g. MySQL allows names 8230 # such as "foo.*", "*.*" which are not easily parseable yet 8231 securable = self._try_parse(self._parse_table_parts) 8232 8233 if not securable or not self._match_text_seq("TO"): 8234 return self._parse_as_command(start) 8235 8236 principals = self._parse_csv(self._parse_grant_principal) 8237 8238 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8239 8240 if self._curr: 8241 return self._parse_as_command(start) 8242 8243 return self.expression( 8244 exp.Grant, 8245 privileges=privileges, 8246 kind=kind, 8247 securable=securable, 8248 principals=principals, 8249 grant_option=grant_option, 8250 ) 8251 8252 def _parse_overlay(self) -> exp.Overlay: 8253 return self.expression( 8254 exp.Overlay, 8255 **{ # type: ignore 8256 "this": self._parse_bitwise(), 8257 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8258 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8259 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8260 }, 8261 ) 8262 8263 def _parse_format_name(self) -> exp.Property: 8264 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8265 # for FILE_FORMAT = <format_name> 8266 return self.expression( 8267 exp.Property, 8268 this=exp.var("FORMAT_NAME"), 8269 value=self._parse_string() or self._parse_table_parts(), 8270 ) 8271 8272 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8273 args: t.List[exp.Expression] = [] 8274 8275 if self._match(TokenType.DISTINCT): 8276 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8277 self._match(TokenType.COMMA) 8278 8279 args.extend(self._parse_csv(self._parse_assignment)) 8280 8281 return self.expression( 8282 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8283 ) 8284 8285 def _identifier_expression( 8286 self, token: t.Optional[Token] = None, **kwargs: t.Any 8287 ) -> exp.Identifier: 8288 token = token or self._prev 8289 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8290 expression.update_positions(token) 8291 return expression
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1522 def __init__( 1523 self, 1524 error_level: t.Optional[ErrorLevel] = None, 1525 error_message_context: int = 100, 1526 max_errors: int = 3, 1527 dialect: DialectType = None, 1528 ): 1529 from sqlglot.dialects import Dialect 1530 1531 self.error_level = error_level or ErrorLevel.IMMEDIATE 1532 self.error_message_context = error_message_context 1533 self.max_errors = max_errors 1534 self.dialect = Dialect.get_or_raise(dialect) 1535 self.reset()
1547 def parse( 1548 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1549 ) -> t.List[t.Optional[exp.Expression]]: 1550 """ 1551 Parses a list of tokens and returns a list of syntax trees, one tree 1552 per parsed SQL statement. 1553 1554 Args: 1555 raw_tokens: The list of tokens. 1556 sql: The original SQL string, used to produce helpful debug messages. 1557 1558 Returns: 1559 The list of the produced syntax trees. 1560 """ 1561 return self._parse( 1562 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1563 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1565 def parse_into( 1566 self, 1567 expression_types: exp.IntoType, 1568 raw_tokens: t.List[Token], 1569 sql: t.Optional[str] = None, 1570 ) -> t.List[t.Optional[exp.Expression]]: 1571 """ 1572 Parses a list of tokens into a given Expression type. If a collection of Expression 1573 types is given instead, this method will try to parse the token list into each one 1574 of them, stopping at the first for which the parsing succeeds. 1575 1576 Args: 1577 expression_types: The expression type(s) to try and parse the token list into. 1578 raw_tokens: The list of tokens. 1579 sql: The original SQL string, used to produce helpful debug messages. 1580 1581 Returns: 1582 The target Expression. 1583 """ 1584 errors = [] 1585 for expression_type in ensure_list(expression_types): 1586 parser = self.EXPRESSION_PARSERS.get(expression_type) 1587 if not parser: 1588 raise TypeError(f"No parser registered for {expression_type}") 1589 1590 try: 1591 return self._parse(parser, raw_tokens, sql) 1592 except ParseError as e: 1593 e.errors[0]["into_expression"] = expression_type 1594 errors.append(e) 1595 1596 raise ParseError( 1597 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1598 errors=merge_errors(errors), 1599 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1639 def check_errors(self) -> None: 1640 """Logs or raises any found errors, depending on the chosen error level setting.""" 1641 if self.error_level == ErrorLevel.WARN: 1642 for error in self.errors: 1643 logger.error(str(error)) 1644 elif self.error_level == ErrorLevel.RAISE and self.errors: 1645 raise ParseError( 1646 concat_messages(self.errors, self.max_errors), 1647 errors=merge_errors(self.errors), 1648 )
Logs or raises any found errors, depending on the chosen error level setting.
1650 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1651 """ 1652 Appends an error in the list of recorded errors or raises it, depending on the chosen 1653 error level setting. 1654 """ 1655 token = token or self._curr or self._prev or Token.string("") 1656 start = token.start 1657 end = token.end + 1 1658 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1659 highlight = self.sql[start:end] 1660 end_context = self.sql[end : end + self.error_message_context] 1661 1662 error = ParseError.new( 1663 f"{message}. Line {token.line}, Col: {token.col}.\n" 1664 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1665 description=message, 1666 line=token.line, 1667 col=token.col, 1668 start_context=start_context, 1669 highlight=highlight, 1670 end_context=end_context, 1671 ) 1672 1673 if self.error_level == ErrorLevel.IMMEDIATE: 1674 raise error 1675 1676 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1678 def expression( 1679 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1680 ) -> E: 1681 """ 1682 Creates a new, validated Expression. 1683 1684 Args: 1685 exp_class: The expression class to instantiate. 1686 comments: An optional list of comments to attach to the expression. 1687 kwargs: The arguments to set for the expression along with their respective values. 1688 1689 Returns: 1690 The target expression. 1691 """ 1692 instance = exp_class(**kwargs) 1693 instance.add_comments(comments) if comments else self._add_comments(instance) 1694 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1701 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1702 """ 1703 Validates an Expression, making sure that all its mandatory arguments are set. 1704 1705 Args: 1706 expression: The expression to validate. 1707 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1708 1709 Returns: 1710 The validated expression. 1711 """ 1712 if self.error_level != ErrorLevel.IGNORE: 1713 for error_message in expression.error_messages(args): 1714 self.raise_error(error_message) 1715 1716 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4689 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4690 start = self._index 4691 _, side_token, kind_token = self._parse_join_parts() 4692 4693 side = side_token.text if side_token else None 4694 kind = kind_token.text if kind_token else None 4695 4696 if not self._match_set(self.SET_OPERATIONS): 4697 self._retreat(start) 4698 return None 4699 4700 token_type = self._prev.token_type 4701 4702 if token_type == TokenType.UNION: 4703 operation: t.Type[exp.SetOperation] = exp.Union 4704 elif token_type == TokenType.EXCEPT: 4705 operation = exp.Except 4706 else: 4707 operation = exp.Intersect 4708 4709 comments = self._prev.comments 4710 4711 if self._match(TokenType.DISTINCT): 4712 distinct: t.Optional[bool] = True 4713 elif self._match(TokenType.ALL): 4714 distinct = False 4715 else: 4716 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4717 if distinct is None: 4718 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4719 4720 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4721 "STRICT", "CORRESPONDING" 4722 ) 4723 if self._match_text_seq("CORRESPONDING"): 4724 by_name = True 4725 if not side and not kind: 4726 kind = "INNER" 4727 4728 on_column_list = None 4729 if by_name and self._match_texts(("ON", "BY")): 4730 on_column_list = self._parse_wrapped_csv(self._parse_column) 4731 4732 expression = self._parse_select(nested=True, parse_set_operation=False) 4733 4734 return self.expression( 4735 operation, 4736 comments=comments, 4737 this=this, 4738 distinct=distinct, 4739 by_name=by_name, 4740 expression=expression, 4741 side=side, 4742 kind=kind, 4743 on=on_column_list, 4744 )