sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce( 154 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 155) -> exp.Coalesce: 156 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 157 158 159def build_locate_strposition(args: t.List): 160 return exp.StrPosition( 161 this=seq_get(args, 1), 162 substr=seq_get(args, 0), 163 position=seq_get(args, 2), 164 ) 165 166 167class _Parser(type): 168 def __new__(cls, clsname, bases, attrs): 169 klass = super().__new__(cls, clsname, bases, attrs) 170 171 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 172 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 173 174 return klass 175 176 177class Parser(metaclass=_Parser): 178 """ 179 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 180 181 Args: 182 error_level: The desired error level. 183 Default: ErrorLevel.IMMEDIATE 184 error_message_context: The amount of context to capture from a query string when displaying 185 the error message (in number of characters). 186 Default: 100 187 max_errors: Maximum number of error messages to include in a raised ParseError. 188 This is only relevant if error_level is ErrorLevel.RAISE. 189 Default: 3 190 """ 191 192 FUNCTIONS: t.Dict[str, t.Callable] = { 193 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 194 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 195 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 196 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 200 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 201 ), 202 "CHAR": lambda args: exp.Chr(expressions=args), 203 "CHR": lambda args: exp.Chr(expressions=args), 204 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 205 "CONCAT": lambda args, dialect: exp.Concat( 206 expressions=args, 207 safe=not dialect.STRICT_STRING_CONCAT, 208 coalesce=dialect.CONCAT_COALESCE, 209 ), 210 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONVERT_TIMEZONE": build_convert_timezone, 216 "DATE_TO_DATE_STR": lambda args: exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 221 start=seq_get(args, 0), 222 end=seq_get(args, 1), 223 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 224 ), 225 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 226 "HEX": build_hex, 227 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 228 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 229 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 230 "LIKE": build_like, 231 "LOG": build_logarithm, 232 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 233 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 234 "LOWER": build_lower, 235 "LPAD": lambda args: build_pad(args), 236 "LEFTPAD": lambda args: build_pad(args), 237 "LTRIM": lambda args: build_trim(args), 238 "MOD": build_mod, 239 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 240 "RPAD": lambda args: build_pad(args, is_left=False), 241 "RTRIM": lambda args: build_trim(args, is_left=False), 242 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 243 if len(args) != 2 244 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 245 "STRPOS": exp.StrPosition.from_arg_list, 246 "CHARINDEX": lambda args: build_locate_strposition(args), 247 "INSTR": exp.StrPosition.from_arg_list, 248 "LOCATE": lambda args: build_locate_strposition(args), 249 "TIME_TO_TIME_STR": lambda args: exp.Cast( 250 this=seq_get(args, 0), 251 to=exp.DataType(this=exp.DataType.Type.TEXT), 252 ), 253 "TO_HEX": build_hex, 254 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 255 this=exp.Cast( 256 this=seq_get(args, 0), 257 to=exp.DataType(this=exp.DataType.Type.TEXT), 258 ), 259 start=exp.Literal.number(1), 260 length=exp.Literal.number(10), 261 ), 262 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 263 "UPPER": build_upper, 264 "VAR_MAP": build_var_map, 265 } 266 267 NO_PAREN_FUNCTIONS = { 268 TokenType.CURRENT_DATE: exp.CurrentDate, 269 TokenType.CURRENT_DATETIME: exp.CurrentDate, 270 TokenType.CURRENT_TIME: exp.CurrentTime, 271 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 272 TokenType.CURRENT_USER: exp.CurrentUser, 273 } 274 275 STRUCT_TYPE_TOKENS = { 276 TokenType.NESTED, 277 TokenType.OBJECT, 278 TokenType.STRUCT, 279 TokenType.UNION, 280 } 281 282 NESTED_TYPE_TOKENS = { 283 TokenType.ARRAY, 284 TokenType.LIST, 285 TokenType.LOWCARDINALITY, 286 TokenType.MAP, 287 TokenType.NULLABLE, 288 TokenType.RANGE, 289 *STRUCT_TYPE_TOKENS, 290 } 291 292 ENUM_TYPE_TOKENS = { 293 TokenType.DYNAMIC, 294 TokenType.ENUM, 295 TokenType.ENUM8, 296 TokenType.ENUM16, 297 } 298 299 AGGREGATE_TYPE_TOKENS = { 300 TokenType.AGGREGATEFUNCTION, 301 TokenType.SIMPLEAGGREGATEFUNCTION, 302 } 303 304 TYPE_TOKENS = { 305 TokenType.BIT, 306 TokenType.BOOLEAN, 307 TokenType.TINYINT, 308 TokenType.UTINYINT, 309 TokenType.SMALLINT, 310 TokenType.USMALLINT, 311 TokenType.INT, 312 TokenType.UINT, 313 TokenType.BIGINT, 314 TokenType.UBIGINT, 315 TokenType.INT128, 316 TokenType.UINT128, 317 TokenType.INT256, 318 TokenType.UINT256, 319 TokenType.MEDIUMINT, 320 TokenType.UMEDIUMINT, 321 TokenType.FIXEDSTRING, 322 TokenType.FLOAT, 323 TokenType.DOUBLE, 324 TokenType.UDOUBLE, 325 TokenType.CHAR, 326 TokenType.NCHAR, 327 TokenType.VARCHAR, 328 TokenType.NVARCHAR, 329 TokenType.BPCHAR, 330 TokenType.TEXT, 331 TokenType.MEDIUMTEXT, 332 TokenType.LONGTEXT, 333 TokenType.BLOB, 334 TokenType.MEDIUMBLOB, 335 TokenType.LONGBLOB, 336 TokenType.BINARY, 337 TokenType.VARBINARY, 338 TokenType.JSON, 339 TokenType.JSONB, 340 TokenType.INTERVAL, 341 TokenType.TINYBLOB, 342 TokenType.TINYTEXT, 343 TokenType.TIME, 344 TokenType.TIMETZ, 345 TokenType.TIMESTAMP, 346 TokenType.TIMESTAMP_S, 347 TokenType.TIMESTAMP_MS, 348 TokenType.TIMESTAMP_NS, 349 TokenType.TIMESTAMPTZ, 350 TokenType.TIMESTAMPLTZ, 351 TokenType.TIMESTAMPNTZ, 352 TokenType.DATETIME, 353 TokenType.DATETIME2, 354 TokenType.DATETIME64, 355 TokenType.SMALLDATETIME, 356 TokenType.DATE, 357 TokenType.DATE32, 358 TokenType.INT4RANGE, 359 TokenType.INT4MULTIRANGE, 360 TokenType.INT8RANGE, 361 TokenType.INT8MULTIRANGE, 362 TokenType.NUMRANGE, 363 TokenType.NUMMULTIRANGE, 364 TokenType.TSRANGE, 365 TokenType.TSMULTIRANGE, 366 TokenType.TSTZRANGE, 367 TokenType.TSTZMULTIRANGE, 368 TokenType.DATERANGE, 369 TokenType.DATEMULTIRANGE, 370 TokenType.DECIMAL, 371 TokenType.DECIMAL32, 372 TokenType.DECIMAL64, 373 TokenType.DECIMAL128, 374 TokenType.DECIMAL256, 375 TokenType.UDECIMAL, 376 TokenType.BIGDECIMAL, 377 TokenType.UUID, 378 TokenType.GEOGRAPHY, 379 TokenType.GEOMETRY, 380 TokenType.POINT, 381 TokenType.RING, 382 TokenType.LINESTRING, 383 TokenType.MULTILINESTRING, 384 TokenType.POLYGON, 385 TokenType.MULTIPOLYGON, 386 TokenType.HLLSKETCH, 387 TokenType.HSTORE, 388 TokenType.PSEUDO_TYPE, 389 TokenType.SUPER, 390 TokenType.SERIAL, 391 TokenType.SMALLSERIAL, 392 TokenType.BIGSERIAL, 393 TokenType.XML, 394 TokenType.YEAR, 395 TokenType.USERDEFINED, 396 TokenType.MONEY, 397 TokenType.SMALLMONEY, 398 TokenType.ROWVERSION, 399 TokenType.IMAGE, 400 TokenType.VARIANT, 401 TokenType.VECTOR, 402 TokenType.VOID, 403 TokenType.OBJECT, 404 TokenType.OBJECT_IDENTIFIER, 405 TokenType.INET, 406 TokenType.IPADDRESS, 407 TokenType.IPPREFIX, 408 TokenType.IPV4, 409 TokenType.IPV6, 410 TokenType.UNKNOWN, 411 TokenType.NOTHING, 412 TokenType.NULL, 413 TokenType.NAME, 414 TokenType.TDIGEST, 415 TokenType.DYNAMIC, 416 *ENUM_TYPE_TOKENS, 417 *NESTED_TYPE_TOKENS, 418 *AGGREGATE_TYPE_TOKENS, 419 } 420 421 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 422 TokenType.BIGINT: TokenType.UBIGINT, 423 TokenType.INT: TokenType.UINT, 424 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 425 TokenType.SMALLINT: TokenType.USMALLINT, 426 TokenType.TINYINT: TokenType.UTINYINT, 427 TokenType.DECIMAL: TokenType.UDECIMAL, 428 TokenType.DOUBLE: TokenType.UDOUBLE, 429 } 430 431 SUBQUERY_PREDICATES = { 432 TokenType.ANY: exp.Any, 433 TokenType.ALL: exp.All, 434 TokenType.EXISTS: exp.Exists, 435 TokenType.SOME: exp.Any, 436 } 437 438 RESERVED_TOKENS = { 439 *Tokenizer.SINGLE_TOKENS.values(), 440 TokenType.SELECT, 441 } - {TokenType.IDENTIFIER} 442 443 DB_CREATABLES = { 444 TokenType.DATABASE, 445 TokenType.DICTIONARY, 446 TokenType.FILE_FORMAT, 447 TokenType.MODEL, 448 TokenType.NAMESPACE, 449 TokenType.SCHEMA, 450 TokenType.SEQUENCE, 451 TokenType.SINK, 452 TokenType.SOURCE, 453 TokenType.STAGE, 454 TokenType.STORAGE_INTEGRATION, 455 TokenType.STREAMLIT, 456 TokenType.TABLE, 457 TokenType.TAG, 458 TokenType.VIEW, 459 TokenType.WAREHOUSE, 460 } 461 462 CREATABLES = { 463 TokenType.COLUMN, 464 TokenType.CONSTRAINT, 465 TokenType.FOREIGN_KEY, 466 TokenType.FUNCTION, 467 TokenType.INDEX, 468 TokenType.PROCEDURE, 469 *DB_CREATABLES, 470 } 471 472 ALTERABLES = { 473 TokenType.INDEX, 474 TokenType.TABLE, 475 TokenType.VIEW, 476 } 477 478 # Tokens that can represent identifiers 479 ID_VAR_TOKENS = { 480 TokenType.ALL, 481 TokenType.ATTACH, 482 TokenType.VAR, 483 TokenType.ANTI, 484 TokenType.APPLY, 485 TokenType.ASC, 486 TokenType.ASOF, 487 TokenType.AUTO_INCREMENT, 488 TokenType.BEGIN, 489 TokenType.BPCHAR, 490 TokenType.CACHE, 491 TokenType.CASE, 492 TokenType.COLLATE, 493 TokenType.COMMAND, 494 TokenType.COMMENT, 495 TokenType.COMMIT, 496 TokenType.CONSTRAINT, 497 TokenType.COPY, 498 TokenType.CUBE, 499 TokenType.CURRENT_SCHEMA, 500 TokenType.DEFAULT, 501 TokenType.DELETE, 502 TokenType.DESC, 503 TokenType.DESCRIBE, 504 TokenType.DETACH, 505 TokenType.DICTIONARY, 506 TokenType.DIV, 507 TokenType.END, 508 TokenType.EXECUTE, 509 TokenType.EXPORT, 510 TokenType.ESCAPE, 511 TokenType.FALSE, 512 TokenType.FIRST, 513 TokenType.FILTER, 514 TokenType.FINAL, 515 TokenType.FORMAT, 516 TokenType.FULL, 517 TokenType.GET, 518 TokenType.IDENTIFIER, 519 TokenType.IS, 520 TokenType.ISNULL, 521 TokenType.INTERVAL, 522 TokenType.KEEP, 523 TokenType.KILL, 524 TokenType.LEFT, 525 TokenType.LIMIT, 526 TokenType.LOAD, 527 TokenType.MERGE, 528 TokenType.NATURAL, 529 TokenType.NEXT, 530 TokenType.OFFSET, 531 TokenType.OPERATOR, 532 TokenType.ORDINALITY, 533 TokenType.OVERLAPS, 534 TokenType.OVERWRITE, 535 TokenType.PARTITION, 536 TokenType.PERCENT, 537 TokenType.PIVOT, 538 TokenType.PRAGMA, 539 TokenType.PUT, 540 TokenType.RANGE, 541 TokenType.RECURSIVE, 542 TokenType.REFERENCES, 543 TokenType.REFRESH, 544 TokenType.RENAME, 545 TokenType.REPLACE, 546 TokenType.RIGHT, 547 TokenType.ROLLUP, 548 TokenType.ROW, 549 TokenType.ROWS, 550 TokenType.SEMI, 551 TokenType.SET, 552 TokenType.SETTINGS, 553 TokenType.SHOW, 554 TokenType.TEMPORARY, 555 TokenType.TOP, 556 TokenType.TRUE, 557 TokenType.TRUNCATE, 558 TokenType.UNIQUE, 559 TokenType.UNNEST, 560 TokenType.UNPIVOT, 561 TokenType.UPDATE, 562 TokenType.USE, 563 TokenType.VOLATILE, 564 TokenType.WINDOW, 565 *CREATABLES, 566 *SUBQUERY_PREDICATES, 567 *TYPE_TOKENS, 568 *NO_PAREN_FUNCTIONS, 569 } 570 ID_VAR_TOKENS.remove(TokenType.UNION) 571 572 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 573 TokenType.ANTI, 574 TokenType.APPLY, 575 TokenType.ASOF, 576 TokenType.FULL, 577 TokenType.LEFT, 578 TokenType.LOCK, 579 TokenType.NATURAL, 580 TokenType.RIGHT, 581 TokenType.SEMI, 582 TokenType.WINDOW, 583 } 584 585 ALIAS_TOKENS = ID_VAR_TOKENS 586 587 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 588 589 ARRAY_CONSTRUCTORS = { 590 "ARRAY": exp.Array, 591 "LIST": exp.List, 592 } 593 594 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 595 596 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 597 598 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 599 600 FUNC_TOKENS = { 601 TokenType.COLLATE, 602 TokenType.COMMAND, 603 TokenType.CURRENT_DATE, 604 TokenType.CURRENT_DATETIME, 605 TokenType.CURRENT_SCHEMA, 606 TokenType.CURRENT_TIMESTAMP, 607 TokenType.CURRENT_TIME, 608 TokenType.CURRENT_USER, 609 TokenType.FILTER, 610 TokenType.FIRST, 611 TokenType.FORMAT, 612 TokenType.GET, 613 TokenType.GLOB, 614 TokenType.IDENTIFIER, 615 TokenType.INDEX, 616 TokenType.ISNULL, 617 TokenType.ILIKE, 618 TokenType.INSERT, 619 TokenType.LIKE, 620 TokenType.MERGE, 621 TokenType.NEXT, 622 TokenType.OFFSET, 623 TokenType.PRIMARY_KEY, 624 TokenType.RANGE, 625 TokenType.REPLACE, 626 TokenType.RLIKE, 627 TokenType.ROW, 628 TokenType.UNNEST, 629 TokenType.VAR, 630 TokenType.LEFT, 631 TokenType.RIGHT, 632 TokenType.SEQUENCE, 633 TokenType.DATE, 634 TokenType.DATETIME, 635 TokenType.TABLE, 636 TokenType.TIMESTAMP, 637 TokenType.TIMESTAMPTZ, 638 TokenType.TRUNCATE, 639 TokenType.WINDOW, 640 TokenType.XOR, 641 *TYPE_TOKENS, 642 *SUBQUERY_PREDICATES, 643 } 644 645 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.AND: exp.And, 647 } 648 649 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.COLON_EQ: exp.PropertyEQ, 651 } 652 653 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.OR: exp.Or, 655 } 656 657 EQUALITY = { 658 TokenType.EQ: exp.EQ, 659 TokenType.NEQ: exp.NEQ, 660 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 661 } 662 663 COMPARISON = { 664 TokenType.GT: exp.GT, 665 TokenType.GTE: exp.GTE, 666 TokenType.LT: exp.LT, 667 TokenType.LTE: exp.LTE, 668 } 669 670 BITWISE = { 671 TokenType.AMP: exp.BitwiseAnd, 672 TokenType.CARET: exp.BitwiseXor, 673 TokenType.PIPE: exp.BitwiseOr, 674 } 675 676 TERM = { 677 TokenType.DASH: exp.Sub, 678 TokenType.PLUS: exp.Add, 679 TokenType.MOD: exp.Mod, 680 TokenType.COLLATE: exp.Collate, 681 } 682 683 FACTOR = { 684 TokenType.DIV: exp.IntDiv, 685 TokenType.LR_ARROW: exp.Distance, 686 TokenType.SLASH: exp.Div, 687 TokenType.STAR: exp.Mul, 688 } 689 690 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 691 692 TIMES = { 693 TokenType.TIME, 694 TokenType.TIMETZ, 695 } 696 697 TIMESTAMPS = { 698 TokenType.TIMESTAMP, 699 TokenType.TIMESTAMPNTZ, 700 TokenType.TIMESTAMPTZ, 701 TokenType.TIMESTAMPLTZ, 702 *TIMES, 703 } 704 705 SET_OPERATIONS = { 706 TokenType.UNION, 707 TokenType.INTERSECT, 708 TokenType.EXCEPT, 709 } 710 711 JOIN_METHODS = { 712 TokenType.ASOF, 713 TokenType.NATURAL, 714 TokenType.POSITIONAL, 715 } 716 717 JOIN_SIDES = { 718 TokenType.LEFT, 719 TokenType.RIGHT, 720 TokenType.FULL, 721 } 722 723 JOIN_KINDS = { 724 TokenType.ANTI, 725 TokenType.CROSS, 726 TokenType.INNER, 727 TokenType.OUTER, 728 TokenType.SEMI, 729 TokenType.STRAIGHT_JOIN, 730 } 731 732 JOIN_HINTS: t.Set[str] = set() 733 734 LAMBDAS = { 735 TokenType.ARROW: lambda self, expressions: self.expression( 736 exp.Lambda, 737 this=self._replace_lambda( 738 self._parse_assignment(), 739 expressions, 740 ), 741 expressions=expressions, 742 ), 743 TokenType.FARROW: lambda self, expressions: self.expression( 744 exp.Kwarg, 745 this=exp.var(expressions[0].name), 746 expression=self._parse_assignment(), 747 ), 748 } 749 750 COLUMN_OPERATORS = { 751 TokenType.DOT: None, 752 TokenType.DOTCOLON: lambda self, this, to: self.expression( 753 exp.JSONCast, 754 this=this, 755 to=to, 756 ), 757 TokenType.DCOLON: lambda self, this, to: self.expression( 758 exp.Cast if self.STRICT_CAST else exp.TryCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.ARROW: lambda self, this, path: self.expression( 763 exp.JSONExtract, 764 this=this, 765 expression=self.dialect.to_json_path(path), 766 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 767 ), 768 TokenType.DARROW: lambda self, this, path: self.expression( 769 exp.JSONExtractScalar, 770 this=this, 771 expression=self.dialect.to_json_path(path), 772 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 773 ), 774 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 775 exp.JSONBExtract, 776 this=this, 777 expression=path, 778 ), 779 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtractScalar, 781 this=this, 782 expression=path, 783 ), 784 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 785 exp.JSONBContains, 786 this=this, 787 expression=key, 788 ), 789 } 790 791 EXPRESSION_PARSERS = { 792 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 793 exp.Column: lambda self: self._parse_column(), 794 exp.Condition: lambda self: self._parse_assignment(), 795 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 796 exp.Expression: lambda self: self._parse_expression(), 797 exp.From: lambda self: self._parse_from(joins=True), 798 exp.Group: lambda self: self._parse_group(), 799 exp.Having: lambda self: self._parse_having(), 800 exp.Hint: lambda self: self._parse_hint_body(), 801 exp.Identifier: lambda self: self._parse_id_var(), 802 exp.Join: lambda self: self._parse_join(), 803 exp.Lambda: lambda self: self._parse_lambda(), 804 exp.Lateral: lambda self: self._parse_lateral(), 805 exp.Limit: lambda self: self._parse_limit(), 806 exp.Offset: lambda self: self._parse_offset(), 807 exp.Order: lambda self: self._parse_order(), 808 exp.Ordered: lambda self: self._parse_ordered(), 809 exp.Properties: lambda self: self._parse_properties(), 810 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 811 exp.Qualify: lambda self: self._parse_qualify(), 812 exp.Returning: lambda self: self._parse_returning(), 813 exp.Select: lambda self: self._parse_select(), 814 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 815 exp.Table: lambda self: self._parse_table_parts(), 816 exp.TableAlias: lambda self: self._parse_table_alias(), 817 exp.Tuple: lambda self: self._parse_value(values=False), 818 exp.Whens: lambda self: self._parse_when_matched(), 819 exp.Where: lambda self: self._parse_where(), 820 exp.Window: lambda self: self._parse_named_window(), 821 exp.With: lambda self: self._parse_with(), 822 "JOIN_TYPE": lambda self: self._parse_join_parts(), 823 } 824 825 STATEMENT_PARSERS = { 826 TokenType.ALTER: lambda self: self._parse_alter(), 827 TokenType.ANALYZE: lambda self: self._parse_analyze(), 828 TokenType.BEGIN: lambda self: self._parse_transaction(), 829 TokenType.CACHE: lambda self: self._parse_cache(), 830 TokenType.COMMENT: lambda self: self._parse_comment(), 831 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 832 TokenType.COPY: lambda self: self._parse_copy(), 833 TokenType.CREATE: lambda self: self._parse_create(), 834 TokenType.DELETE: lambda self: self._parse_delete(), 835 TokenType.DESC: lambda self: self._parse_describe(), 836 TokenType.DESCRIBE: lambda self: self._parse_describe(), 837 TokenType.DROP: lambda self: self._parse_drop(), 838 TokenType.GRANT: lambda self: self._parse_grant(), 839 TokenType.INSERT: lambda self: self._parse_insert(), 840 TokenType.KILL: lambda self: self._parse_kill(), 841 TokenType.LOAD: lambda self: self._parse_load(), 842 TokenType.MERGE: lambda self: self._parse_merge(), 843 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 844 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 845 TokenType.REFRESH: lambda self: self._parse_refresh(), 846 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 847 TokenType.SET: lambda self: self._parse_set(), 848 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 849 TokenType.UNCACHE: lambda self: self._parse_uncache(), 850 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 851 TokenType.UPDATE: lambda self: self._parse_update(), 852 TokenType.USE: lambda self: self._parse_use(), 853 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 854 } 855 856 UNARY_PARSERS = { 857 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 858 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 859 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 860 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 861 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 862 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 863 } 864 865 STRING_PARSERS = { 866 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 867 exp.RawString, this=token.text 868 ), 869 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 870 exp.National, this=token.text 871 ), 872 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 873 TokenType.STRING: lambda self, token: self.expression( 874 exp.Literal, this=token.text, is_string=True 875 ), 876 TokenType.UNICODE_STRING: lambda self, token: self.expression( 877 exp.UnicodeString, 878 this=token.text, 879 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 880 ), 881 } 882 883 NUMERIC_PARSERS = { 884 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 885 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 886 TokenType.HEX_STRING: lambda self, token: self.expression( 887 exp.HexString, 888 this=token.text, 889 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 890 ), 891 TokenType.NUMBER: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=False 893 ), 894 } 895 896 PRIMARY_PARSERS = { 897 **STRING_PARSERS, 898 **NUMERIC_PARSERS, 899 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 900 TokenType.NULL: lambda self, _: self.expression(exp.Null), 901 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 902 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 903 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 904 TokenType.STAR: lambda self, _: self._parse_star_ops(), 905 } 906 907 PLACEHOLDER_PARSERS = { 908 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 909 TokenType.PARAMETER: lambda self: self._parse_parameter(), 910 TokenType.COLON: lambda self: ( 911 self.expression(exp.Placeholder, this=self._prev.text) 912 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 913 else None 914 ), 915 } 916 917 RANGE_PARSERS = { 918 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 919 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 920 TokenType.GLOB: binary_range_parser(exp.Glob), 921 TokenType.ILIKE: binary_range_parser(exp.ILike), 922 TokenType.IN: lambda self, this: self._parse_in(this), 923 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 924 TokenType.IS: lambda self, this: self._parse_is(this), 925 TokenType.LIKE: binary_range_parser(exp.Like), 926 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 927 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 928 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 929 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 930 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 931 } 932 933 PIPE_SYNTAX_TRANSFORM_PARSERS = { 934 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 935 "WHERE": lambda self, query: self._parse_pipe_syntax_where(query), 936 "ORDER BY": lambda self, query: query.order_by( 937 self._parse_order(), append=False, copy=False 938 ), 939 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 940 "OFFSET": lambda self, query: query.offset(self._parse_offset(), copy=False), 941 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 942 } 943 944 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 945 "ALLOWED_VALUES": lambda self: self.expression( 946 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 947 ), 948 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 949 "AUTO": lambda self: self._parse_auto_property(), 950 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 951 "BACKUP": lambda self: self.expression( 952 exp.BackupProperty, this=self._parse_var(any_token=True) 953 ), 954 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 955 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 956 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 957 "CHECKSUM": lambda self: self._parse_checksum(), 958 "CLUSTER BY": lambda self: self._parse_cluster(), 959 "CLUSTERED": lambda self: self._parse_clustered_by(), 960 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 961 exp.CollateProperty, **kwargs 962 ), 963 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 964 "CONTAINS": lambda self: self._parse_contains_property(), 965 "COPY": lambda self: self._parse_copy_property(), 966 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 967 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 968 "DEFINER": lambda self: self._parse_definer(), 969 "DETERMINISTIC": lambda self: self.expression( 970 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 971 ), 972 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 973 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 974 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 975 "DISTKEY": lambda self: self._parse_distkey(), 976 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 977 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 978 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 979 "ENVIRONMENT": lambda self: self.expression( 980 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 981 ), 982 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 983 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 984 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 985 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 986 "FREESPACE": lambda self: self._parse_freespace(), 987 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 988 "HEAP": lambda self: self.expression(exp.HeapProperty), 989 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 990 "IMMUTABLE": lambda self: self.expression( 991 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 992 ), 993 "INHERITS": lambda self: self.expression( 994 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 995 ), 996 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 997 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 998 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 999 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1000 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1001 "LIKE": lambda self: self._parse_create_like(), 1002 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1003 "LOCK": lambda self: self._parse_locking(), 1004 "LOCKING": lambda self: self._parse_locking(), 1005 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1006 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1007 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1008 "MODIFIES": lambda self: self._parse_modifies_property(), 1009 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1010 "NO": lambda self: self._parse_no_property(), 1011 "ON": lambda self: self._parse_on_property(), 1012 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1013 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1014 "PARTITION": lambda self: self._parse_partitioned_of(), 1015 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1016 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1017 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1018 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1019 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1020 "READS": lambda self: self._parse_reads_property(), 1021 "REMOTE": lambda self: self._parse_remote_with_connection(), 1022 "RETURNS": lambda self: self._parse_returns(), 1023 "STRICT": lambda self: self.expression(exp.StrictProperty), 1024 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1025 "ROW": lambda self: self._parse_row(), 1026 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1027 "SAMPLE": lambda self: self.expression( 1028 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1029 ), 1030 "SECURE": lambda self: self.expression(exp.SecureProperty), 1031 "SECURITY": lambda self: self._parse_security(), 1032 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1033 "SETTINGS": lambda self: self._parse_settings_property(), 1034 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1035 "SORTKEY": lambda self: self._parse_sortkey(), 1036 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1037 "STABLE": lambda self: self.expression( 1038 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1039 ), 1040 "STORED": lambda self: self._parse_stored(), 1041 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1042 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1043 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1044 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1045 "TO": lambda self: self._parse_to_table(), 1046 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1047 "TRANSFORM": lambda self: self.expression( 1048 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1049 ), 1050 "TTL": lambda self: self._parse_ttl(), 1051 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1052 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1053 "VOLATILE": lambda self: self._parse_volatile_property(), 1054 "WITH": lambda self: self._parse_with_property(), 1055 } 1056 1057 CONSTRAINT_PARSERS = { 1058 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1059 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1060 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1061 "CHARACTER SET": lambda self: self.expression( 1062 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1063 ), 1064 "CHECK": lambda self: self.expression( 1065 exp.CheckColumnConstraint, 1066 this=self._parse_wrapped(self._parse_assignment), 1067 enforced=self._match_text_seq("ENFORCED"), 1068 ), 1069 "COLLATE": lambda self: self.expression( 1070 exp.CollateColumnConstraint, 1071 this=self._parse_identifier() or self._parse_column(), 1072 ), 1073 "COMMENT": lambda self: self.expression( 1074 exp.CommentColumnConstraint, this=self._parse_string() 1075 ), 1076 "COMPRESS": lambda self: self._parse_compress(), 1077 "CLUSTERED": lambda self: self.expression( 1078 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1079 ), 1080 "NONCLUSTERED": lambda self: self.expression( 1081 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1082 ), 1083 "DEFAULT": lambda self: self.expression( 1084 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1085 ), 1086 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1087 "EPHEMERAL": lambda self: self.expression( 1088 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1089 ), 1090 "EXCLUDE": lambda self: self.expression( 1091 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1092 ), 1093 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1094 "FORMAT": lambda self: self.expression( 1095 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1096 ), 1097 "GENERATED": lambda self: self._parse_generated_as_identity(), 1098 "IDENTITY": lambda self: self._parse_auto_increment(), 1099 "INLINE": lambda self: self._parse_inline(), 1100 "LIKE": lambda self: self._parse_create_like(), 1101 "NOT": lambda self: self._parse_not_constraint(), 1102 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1103 "ON": lambda self: ( 1104 self._match(TokenType.UPDATE) 1105 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1106 ) 1107 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1108 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1109 "PERIOD": lambda self: self._parse_period_for_system_time(), 1110 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1111 "REFERENCES": lambda self: self._parse_references(match=False), 1112 "TITLE": lambda self: self.expression( 1113 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1114 ), 1115 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1116 "UNIQUE": lambda self: self._parse_unique(), 1117 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1118 "WATERMARK": lambda self: self.expression( 1119 exp.WatermarkColumnConstraint, 1120 this=self._match(TokenType.FOR) and self._parse_column(), 1121 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1122 ), 1123 "WITH": lambda self: self.expression( 1124 exp.Properties, expressions=self._parse_wrapped_properties() 1125 ), 1126 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1127 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1128 } 1129 1130 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1131 klass = ( 1132 exp.PartitionedByBucket 1133 if self._prev.text.upper() == "BUCKET" 1134 else exp.PartitionByTruncate 1135 ) 1136 1137 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1138 this, expression = seq_get(args, 0), seq_get(args, 1) 1139 1140 if isinstance(this, exp.Literal): 1141 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1142 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1143 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1144 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1145 # 1146 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1147 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1148 this, expression = expression, this 1149 1150 return self.expression(klass, this=this, expression=expression) 1151 1152 ALTER_PARSERS = { 1153 "ADD": lambda self: self._parse_alter_table_add(), 1154 "AS": lambda self: self._parse_select(), 1155 "ALTER": lambda self: self._parse_alter_table_alter(), 1156 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1157 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1158 "DROP": lambda self: self._parse_alter_table_drop(), 1159 "RENAME": lambda self: self._parse_alter_table_rename(), 1160 "SET": lambda self: self._parse_alter_table_set(), 1161 "SWAP": lambda self: self.expression( 1162 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1163 ), 1164 } 1165 1166 ALTER_ALTER_PARSERS = { 1167 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1168 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1169 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1170 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1171 } 1172 1173 SCHEMA_UNNAMED_CONSTRAINTS = { 1174 "CHECK", 1175 "EXCLUDE", 1176 "FOREIGN KEY", 1177 "LIKE", 1178 "PERIOD", 1179 "PRIMARY KEY", 1180 "UNIQUE", 1181 "WATERMARK", 1182 "BUCKET", 1183 "TRUNCATE", 1184 } 1185 1186 NO_PAREN_FUNCTION_PARSERS = { 1187 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1188 "CASE": lambda self: self._parse_case(), 1189 "CONNECT_BY_ROOT": lambda self: self.expression( 1190 exp.ConnectByRoot, this=self._parse_column() 1191 ), 1192 "IF": lambda self: self._parse_if(), 1193 } 1194 1195 INVALID_FUNC_NAME_TOKENS = { 1196 TokenType.IDENTIFIER, 1197 TokenType.STRING, 1198 } 1199 1200 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1201 1202 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1203 1204 FUNCTION_PARSERS = { 1205 **{ 1206 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1207 }, 1208 **{ 1209 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1210 }, 1211 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1212 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1213 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1214 "DECODE": lambda self: self._parse_decode(), 1215 "EXTRACT": lambda self: self._parse_extract(), 1216 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1217 "GAP_FILL": lambda self: self._parse_gap_fill(), 1218 "JSON_OBJECT": lambda self: self._parse_json_object(), 1219 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1220 "JSON_TABLE": lambda self: self._parse_json_table(), 1221 "MATCH": lambda self: self._parse_match_against(), 1222 "NORMALIZE": lambda self: self._parse_normalize(), 1223 "OPENJSON": lambda self: self._parse_open_json(), 1224 "OVERLAY": lambda self: self._parse_overlay(), 1225 "POSITION": lambda self: self._parse_position(), 1226 "PREDICT": lambda self: self._parse_predict(), 1227 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1228 "STRING_AGG": lambda self: self._parse_string_agg(), 1229 "SUBSTRING": lambda self: self._parse_substring(), 1230 "TRIM": lambda self: self._parse_trim(), 1231 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1232 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1233 "XMLELEMENT": lambda self: self.expression( 1234 exp.XMLElement, 1235 this=self._match_text_seq("NAME") and self._parse_id_var(), 1236 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1237 ), 1238 "XMLTABLE": lambda self: self._parse_xml_table(), 1239 } 1240 1241 QUERY_MODIFIER_PARSERS = { 1242 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1243 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1244 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1245 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1246 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1247 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1248 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1249 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1250 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1251 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1252 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1253 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1254 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1255 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1256 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1257 TokenType.CLUSTER_BY: lambda self: ( 1258 "cluster", 1259 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1260 ), 1261 TokenType.DISTRIBUTE_BY: lambda self: ( 1262 "distribute", 1263 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1264 ), 1265 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1266 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1267 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1268 } 1269 1270 SET_PARSERS = { 1271 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1272 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1273 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1274 "TRANSACTION": lambda self: self._parse_set_transaction(), 1275 } 1276 1277 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1278 1279 TYPE_LITERAL_PARSERS = { 1280 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1281 } 1282 1283 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1284 1285 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1286 1287 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1288 1289 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1290 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1291 "ISOLATION": ( 1292 ("LEVEL", "REPEATABLE", "READ"), 1293 ("LEVEL", "READ", "COMMITTED"), 1294 ("LEVEL", "READ", "UNCOMITTED"), 1295 ("LEVEL", "SERIALIZABLE"), 1296 ), 1297 "READ": ("WRITE", "ONLY"), 1298 } 1299 1300 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1301 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1302 ) 1303 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1304 1305 CREATE_SEQUENCE: OPTIONS_TYPE = { 1306 "SCALE": ("EXTEND", "NOEXTEND"), 1307 "SHARD": ("EXTEND", "NOEXTEND"), 1308 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1309 **dict.fromkeys( 1310 ( 1311 "SESSION", 1312 "GLOBAL", 1313 "KEEP", 1314 "NOKEEP", 1315 "ORDER", 1316 "NOORDER", 1317 "NOCACHE", 1318 "CYCLE", 1319 "NOCYCLE", 1320 "NOMINVALUE", 1321 "NOMAXVALUE", 1322 "NOSCALE", 1323 "NOSHARD", 1324 ), 1325 tuple(), 1326 ), 1327 } 1328 1329 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1330 1331 USABLES: OPTIONS_TYPE = dict.fromkeys( 1332 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1333 ) 1334 1335 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1336 1337 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1338 "TYPE": ("EVOLUTION",), 1339 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1340 } 1341 1342 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1343 1344 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1345 1346 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1347 "NOT": ("ENFORCED",), 1348 "MATCH": ( 1349 "FULL", 1350 "PARTIAL", 1351 "SIMPLE", 1352 ), 1353 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1354 "USING": ( 1355 "BTREE", 1356 "HASH", 1357 ), 1358 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1359 } 1360 1361 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1362 "NO": ("OTHERS",), 1363 "CURRENT": ("ROW",), 1364 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1365 } 1366 1367 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1368 1369 CLONE_KEYWORDS = {"CLONE", "COPY"} 1370 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1371 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1372 1373 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1374 1375 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1376 1377 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1378 1379 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1380 1381 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1382 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1383 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1384 1385 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1386 1387 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1388 1389 ADD_CONSTRAINT_TOKENS = { 1390 TokenType.CONSTRAINT, 1391 TokenType.FOREIGN_KEY, 1392 TokenType.INDEX, 1393 TokenType.KEY, 1394 TokenType.PRIMARY_KEY, 1395 TokenType.UNIQUE, 1396 } 1397 1398 DISTINCT_TOKENS = {TokenType.DISTINCT} 1399 1400 NULL_TOKENS = {TokenType.NULL} 1401 1402 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1403 1404 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1405 1406 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1407 1408 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1409 1410 ODBC_DATETIME_LITERALS = { 1411 "d": exp.Date, 1412 "t": exp.Time, 1413 "ts": exp.Timestamp, 1414 } 1415 1416 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1417 1418 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1419 1420 # The style options for the DESCRIBE statement 1421 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1422 1423 # The style options for the ANALYZE statement 1424 ANALYZE_STYLES = { 1425 "BUFFER_USAGE_LIMIT", 1426 "FULL", 1427 "LOCAL", 1428 "NO_WRITE_TO_BINLOG", 1429 "SAMPLE", 1430 "SKIP_LOCKED", 1431 "VERBOSE", 1432 } 1433 1434 ANALYZE_EXPRESSION_PARSERS = { 1435 "ALL": lambda self: self._parse_analyze_columns(), 1436 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1437 "DELETE": lambda self: self._parse_analyze_delete(), 1438 "DROP": lambda self: self._parse_analyze_histogram(), 1439 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1440 "LIST": lambda self: self._parse_analyze_list(), 1441 "PREDICATE": lambda self: self._parse_analyze_columns(), 1442 "UPDATE": lambda self: self._parse_analyze_histogram(), 1443 "VALIDATE": lambda self: self._parse_analyze_validate(), 1444 } 1445 1446 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1447 1448 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1449 1450 OPERATION_MODIFIERS: t.Set[str] = set() 1451 1452 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1453 1454 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1455 1456 STRICT_CAST = True 1457 1458 PREFIXED_PIVOT_COLUMNS = False 1459 IDENTIFY_PIVOT_STRINGS = False 1460 1461 LOG_DEFAULTS_TO_LN = False 1462 1463 # Whether the table sample clause expects CSV syntax 1464 TABLESAMPLE_CSV = False 1465 1466 # The default method used for table sampling 1467 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1468 1469 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1470 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1471 1472 # Whether the TRIM function expects the characters to trim as its first argument 1473 TRIM_PATTERN_FIRST = False 1474 1475 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1476 STRING_ALIASES = False 1477 1478 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1479 MODIFIERS_ATTACHED_TO_SET_OP = True 1480 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1481 1482 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1483 NO_PAREN_IF_COMMANDS = True 1484 1485 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1486 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1487 1488 # Whether the `:` operator is used to extract a value from a VARIANT column 1489 COLON_IS_VARIANT_EXTRACT = False 1490 1491 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1492 # If this is True and '(' is not found, the keyword will be treated as an identifier 1493 VALUES_FOLLOWED_BY_PAREN = True 1494 1495 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1496 SUPPORTS_IMPLICIT_UNNEST = False 1497 1498 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1499 INTERVAL_SPANS = True 1500 1501 # Whether a PARTITION clause can follow a table reference 1502 SUPPORTS_PARTITION_SELECTION = False 1503 1504 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1505 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1506 1507 # Whether the 'AS' keyword is optional in the CTE definition syntax 1508 OPTIONAL_ALIAS_TOKEN_CTE = True 1509 1510 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1511 ALTER_RENAME_REQUIRES_COLUMN = True 1512 1513 __slots__ = ( 1514 "error_level", 1515 "error_message_context", 1516 "max_errors", 1517 "dialect", 1518 "sql", 1519 "errors", 1520 "_tokens", 1521 "_index", 1522 "_curr", 1523 "_next", 1524 "_prev", 1525 "_prev_comments", 1526 "_pipe_cte_counter", 1527 ) 1528 1529 # Autofilled 1530 SHOW_TRIE: t.Dict = {} 1531 SET_TRIE: t.Dict = {} 1532 1533 def __init__( 1534 self, 1535 error_level: t.Optional[ErrorLevel] = None, 1536 error_message_context: int = 100, 1537 max_errors: int = 3, 1538 dialect: DialectType = None, 1539 ): 1540 from sqlglot.dialects import Dialect 1541 1542 self.error_level = error_level or ErrorLevel.IMMEDIATE 1543 self.error_message_context = error_message_context 1544 self.max_errors = max_errors 1545 self.dialect = Dialect.get_or_raise(dialect) 1546 self.reset() 1547 1548 def reset(self): 1549 self.sql = "" 1550 self.errors = [] 1551 self._tokens = [] 1552 self._index = 0 1553 self._curr = None 1554 self._next = None 1555 self._prev = None 1556 self._prev_comments = None 1557 self._pipe_cte_counter = 0 1558 1559 def parse( 1560 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1561 ) -> t.List[t.Optional[exp.Expression]]: 1562 """ 1563 Parses a list of tokens and returns a list of syntax trees, one tree 1564 per parsed SQL statement. 1565 1566 Args: 1567 raw_tokens: The list of tokens. 1568 sql: The original SQL string, used to produce helpful debug messages. 1569 1570 Returns: 1571 The list of the produced syntax trees. 1572 """ 1573 return self._parse( 1574 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1575 ) 1576 1577 def parse_into( 1578 self, 1579 expression_types: exp.IntoType, 1580 raw_tokens: t.List[Token], 1581 sql: t.Optional[str] = None, 1582 ) -> t.List[t.Optional[exp.Expression]]: 1583 """ 1584 Parses a list of tokens into a given Expression type. If a collection of Expression 1585 types is given instead, this method will try to parse the token list into each one 1586 of them, stopping at the first for which the parsing succeeds. 1587 1588 Args: 1589 expression_types: The expression type(s) to try and parse the token list into. 1590 raw_tokens: The list of tokens. 1591 sql: The original SQL string, used to produce helpful debug messages. 1592 1593 Returns: 1594 The target Expression. 1595 """ 1596 errors = [] 1597 for expression_type in ensure_list(expression_types): 1598 parser = self.EXPRESSION_PARSERS.get(expression_type) 1599 if not parser: 1600 raise TypeError(f"No parser registered for {expression_type}") 1601 1602 try: 1603 return self._parse(parser, raw_tokens, sql) 1604 except ParseError as e: 1605 e.errors[0]["into_expression"] = expression_type 1606 errors.append(e) 1607 1608 raise ParseError( 1609 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1610 errors=merge_errors(errors), 1611 ) from errors[-1] 1612 1613 def _parse( 1614 self, 1615 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1616 raw_tokens: t.List[Token], 1617 sql: t.Optional[str] = None, 1618 ) -> t.List[t.Optional[exp.Expression]]: 1619 self.reset() 1620 self.sql = sql or "" 1621 1622 total = len(raw_tokens) 1623 chunks: t.List[t.List[Token]] = [[]] 1624 1625 for i, token in enumerate(raw_tokens): 1626 if token.token_type == TokenType.SEMICOLON: 1627 if token.comments: 1628 chunks.append([token]) 1629 1630 if i < total - 1: 1631 chunks.append([]) 1632 else: 1633 chunks[-1].append(token) 1634 1635 expressions = [] 1636 1637 for tokens in chunks: 1638 self._index = -1 1639 self._tokens = tokens 1640 self._advance() 1641 1642 expressions.append(parse_method(self)) 1643 1644 if self._index < len(self._tokens): 1645 self.raise_error("Invalid expression / Unexpected token") 1646 1647 self.check_errors() 1648 1649 return expressions 1650 1651 def check_errors(self) -> None: 1652 """Logs or raises any found errors, depending on the chosen error level setting.""" 1653 if self.error_level == ErrorLevel.WARN: 1654 for error in self.errors: 1655 logger.error(str(error)) 1656 elif self.error_level == ErrorLevel.RAISE and self.errors: 1657 raise ParseError( 1658 concat_messages(self.errors, self.max_errors), 1659 errors=merge_errors(self.errors), 1660 ) 1661 1662 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1663 """ 1664 Appends an error in the list of recorded errors or raises it, depending on the chosen 1665 error level setting. 1666 """ 1667 token = token or self._curr or self._prev or Token.string("") 1668 start = token.start 1669 end = token.end + 1 1670 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1671 highlight = self.sql[start:end] 1672 end_context = self.sql[end : end + self.error_message_context] 1673 1674 error = ParseError.new( 1675 f"{message}. Line {token.line}, Col: {token.col}.\n" 1676 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1677 description=message, 1678 line=token.line, 1679 col=token.col, 1680 start_context=start_context, 1681 highlight=highlight, 1682 end_context=end_context, 1683 ) 1684 1685 if self.error_level == ErrorLevel.IMMEDIATE: 1686 raise error 1687 1688 self.errors.append(error) 1689 1690 def expression( 1691 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1692 ) -> E: 1693 """ 1694 Creates a new, validated Expression. 1695 1696 Args: 1697 exp_class: The expression class to instantiate. 1698 comments: An optional list of comments to attach to the expression. 1699 kwargs: The arguments to set for the expression along with their respective values. 1700 1701 Returns: 1702 The target expression. 1703 """ 1704 instance = exp_class(**kwargs) 1705 instance.add_comments(comments) if comments else self._add_comments(instance) 1706 return self.validate_expression(instance) 1707 1708 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1709 if expression and self._prev_comments: 1710 expression.add_comments(self._prev_comments) 1711 self._prev_comments = None 1712 1713 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1714 """ 1715 Validates an Expression, making sure that all its mandatory arguments are set. 1716 1717 Args: 1718 expression: The expression to validate. 1719 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1720 1721 Returns: 1722 The validated expression. 1723 """ 1724 if self.error_level != ErrorLevel.IGNORE: 1725 for error_message in expression.error_messages(args): 1726 self.raise_error(error_message) 1727 1728 return expression 1729 1730 def _find_sql(self, start: Token, end: Token) -> str: 1731 return self.sql[start.start : end.end + 1] 1732 1733 def _is_connected(self) -> bool: 1734 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1735 1736 def _advance(self, times: int = 1) -> None: 1737 self._index += times 1738 self._curr = seq_get(self._tokens, self._index) 1739 self._next = seq_get(self._tokens, self._index + 1) 1740 1741 if self._index > 0: 1742 self._prev = self._tokens[self._index - 1] 1743 self._prev_comments = self._prev.comments 1744 else: 1745 self._prev = None 1746 self._prev_comments = None 1747 1748 def _retreat(self, index: int) -> None: 1749 if index != self._index: 1750 self._advance(index - self._index) 1751 1752 def _warn_unsupported(self) -> None: 1753 if len(self._tokens) <= 1: 1754 return 1755 1756 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1757 # interested in emitting a warning for the one being currently processed. 1758 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1759 1760 logger.warning( 1761 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1762 ) 1763 1764 def _parse_command(self) -> exp.Command: 1765 self._warn_unsupported() 1766 return self.expression( 1767 exp.Command, 1768 comments=self._prev_comments, 1769 this=self._prev.text.upper(), 1770 expression=self._parse_string(), 1771 ) 1772 1773 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1774 """ 1775 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1776 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1777 solve this by setting & resetting the parser state accordingly 1778 """ 1779 index = self._index 1780 error_level = self.error_level 1781 1782 self.error_level = ErrorLevel.IMMEDIATE 1783 try: 1784 this = parse_method() 1785 except ParseError: 1786 this = None 1787 finally: 1788 if not this or retreat: 1789 self._retreat(index) 1790 self.error_level = error_level 1791 1792 return this 1793 1794 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1795 start = self._prev 1796 exists = self._parse_exists() if allow_exists else None 1797 1798 self._match(TokenType.ON) 1799 1800 materialized = self._match_text_seq("MATERIALIZED") 1801 kind = self._match_set(self.CREATABLES) and self._prev 1802 if not kind: 1803 return self._parse_as_command(start) 1804 1805 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1806 this = self._parse_user_defined_function(kind=kind.token_type) 1807 elif kind.token_type == TokenType.TABLE: 1808 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1809 elif kind.token_type == TokenType.COLUMN: 1810 this = self._parse_column() 1811 else: 1812 this = self._parse_id_var() 1813 1814 self._match(TokenType.IS) 1815 1816 return self.expression( 1817 exp.Comment, 1818 this=this, 1819 kind=kind.text, 1820 expression=self._parse_string(), 1821 exists=exists, 1822 materialized=materialized, 1823 ) 1824 1825 def _parse_to_table( 1826 self, 1827 ) -> exp.ToTableProperty: 1828 table = self._parse_table_parts(schema=True) 1829 return self.expression(exp.ToTableProperty, this=table) 1830 1831 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1832 def _parse_ttl(self) -> exp.Expression: 1833 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1834 this = self._parse_bitwise() 1835 1836 if self._match_text_seq("DELETE"): 1837 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1838 if self._match_text_seq("RECOMPRESS"): 1839 return self.expression( 1840 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1841 ) 1842 if self._match_text_seq("TO", "DISK"): 1843 return self.expression( 1844 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1845 ) 1846 if self._match_text_seq("TO", "VOLUME"): 1847 return self.expression( 1848 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1849 ) 1850 1851 return this 1852 1853 expressions = self._parse_csv(_parse_ttl_action) 1854 where = self._parse_where() 1855 group = self._parse_group() 1856 1857 aggregates = None 1858 if group and self._match(TokenType.SET): 1859 aggregates = self._parse_csv(self._parse_set_item) 1860 1861 return self.expression( 1862 exp.MergeTreeTTL, 1863 expressions=expressions, 1864 where=where, 1865 group=group, 1866 aggregates=aggregates, 1867 ) 1868 1869 def _parse_statement(self) -> t.Optional[exp.Expression]: 1870 if self._curr is None: 1871 return None 1872 1873 if self._match_set(self.STATEMENT_PARSERS): 1874 comments = self._prev_comments 1875 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1876 stmt.add_comments(comments, prepend=True) 1877 return stmt 1878 1879 if self._match_set(self.dialect.tokenizer.COMMANDS): 1880 return self._parse_command() 1881 1882 expression = self._parse_expression() 1883 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1884 return self._parse_query_modifiers(expression) 1885 1886 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1887 start = self._prev 1888 temporary = self._match(TokenType.TEMPORARY) 1889 materialized = self._match_text_seq("MATERIALIZED") 1890 1891 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1892 if not kind: 1893 return self._parse_as_command(start) 1894 1895 concurrently = self._match_text_seq("CONCURRENTLY") 1896 if_exists = exists or self._parse_exists() 1897 1898 if kind == "COLUMN": 1899 this = self._parse_column() 1900 else: 1901 this = self._parse_table_parts( 1902 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1903 ) 1904 1905 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1906 1907 if self._match(TokenType.L_PAREN, advance=False): 1908 expressions = self._parse_wrapped_csv(self._parse_types) 1909 else: 1910 expressions = None 1911 1912 return self.expression( 1913 exp.Drop, 1914 exists=if_exists, 1915 this=this, 1916 expressions=expressions, 1917 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1918 temporary=temporary, 1919 materialized=materialized, 1920 cascade=self._match_text_seq("CASCADE"), 1921 constraints=self._match_text_seq("CONSTRAINTS"), 1922 purge=self._match_text_seq("PURGE"), 1923 cluster=cluster, 1924 concurrently=concurrently, 1925 ) 1926 1927 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1928 return ( 1929 self._match_text_seq("IF") 1930 and (not not_ or self._match(TokenType.NOT)) 1931 and self._match(TokenType.EXISTS) 1932 ) 1933 1934 def _parse_create(self) -> exp.Create | exp.Command: 1935 # Note: this can't be None because we've matched a statement parser 1936 start = self._prev 1937 1938 replace = ( 1939 start.token_type == TokenType.REPLACE 1940 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1941 or self._match_pair(TokenType.OR, TokenType.ALTER) 1942 ) 1943 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1944 1945 unique = self._match(TokenType.UNIQUE) 1946 1947 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1948 clustered = True 1949 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1950 "COLUMNSTORE" 1951 ): 1952 clustered = False 1953 else: 1954 clustered = None 1955 1956 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1957 self._advance() 1958 1959 properties = None 1960 create_token = self._match_set(self.CREATABLES) and self._prev 1961 1962 if not create_token: 1963 # exp.Properties.Location.POST_CREATE 1964 properties = self._parse_properties() 1965 create_token = self._match_set(self.CREATABLES) and self._prev 1966 1967 if not properties or not create_token: 1968 return self._parse_as_command(start) 1969 1970 concurrently = self._match_text_seq("CONCURRENTLY") 1971 exists = self._parse_exists(not_=True) 1972 this = None 1973 expression: t.Optional[exp.Expression] = None 1974 indexes = None 1975 no_schema_binding = None 1976 begin = None 1977 end = None 1978 clone = None 1979 1980 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1981 nonlocal properties 1982 if properties and temp_props: 1983 properties.expressions.extend(temp_props.expressions) 1984 elif temp_props: 1985 properties = temp_props 1986 1987 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1988 this = self._parse_user_defined_function(kind=create_token.token_type) 1989 1990 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1991 extend_props(self._parse_properties()) 1992 1993 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1994 extend_props(self._parse_properties()) 1995 1996 if not expression: 1997 if self._match(TokenType.COMMAND): 1998 expression = self._parse_as_command(self._prev) 1999 else: 2000 begin = self._match(TokenType.BEGIN) 2001 return_ = self._match_text_seq("RETURN") 2002 2003 if self._match(TokenType.STRING, advance=False): 2004 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2005 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2006 expression = self._parse_string() 2007 extend_props(self._parse_properties()) 2008 else: 2009 expression = self._parse_user_defined_function_expression() 2010 2011 end = self._match_text_seq("END") 2012 2013 if return_: 2014 expression = self.expression(exp.Return, this=expression) 2015 elif create_token.token_type == TokenType.INDEX: 2016 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2017 if not self._match(TokenType.ON): 2018 index = self._parse_id_var() 2019 anonymous = False 2020 else: 2021 index = None 2022 anonymous = True 2023 2024 this = self._parse_index(index=index, anonymous=anonymous) 2025 elif create_token.token_type in self.DB_CREATABLES: 2026 table_parts = self._parse_table_parts( 2027 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2028 ) 2029 2030 # exp.Properties.Location.POST_NAME 2031 self._match(TokenType.COMMA) 2032 extend_props(self._parse_properties(before=True)) 2033 2034 this = self._parse_schema(this=table_parts) 2035 2036 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2037 extend_props(self._parse_properties()) 2038 2039 has_alias = self._match(TokenType.ALIAS) 2040 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2041 # exp.Properties.Location.POST_ALIAS 2042 extend_props(self._parse_properties()) 2043 2044 if create_token.token_type == TokenType.SEQUENCE: 2045 expression = self._parse_types() 2046 extend_props(self._parse_properties()) 2047 else: 2048 expression = self._parse_ddl_select() 2049 2050 # Some dialects also support using a table as an alias instead of a SELECT. 2051 # Here we fallback to this as an alternative. 2052 if not expression and has_alias: 2053 expression = self._try_parse(self._parse_table_parts) 2054 2055 if create_token.token_type == TokenType.TABLE: 2056 # exp.Properties.Location.POST_EXPRESSION 2057 extend_props(self._parse_properties()) 2058 2059 indexes = [] 2060 while True: 2061 index = self._parse_index() 2062 2063 # exp.Properties.Location.POST_INDEX 2064 extend_props(self._parse_properties()) 2065 if not index: 2066 break 2067 else: 2068 self._match(TokenType.COMMA) 2069 indexes.append(index) 2070 elif create_token.token_type == TokenType.VIEW: 2071 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2072 no_schema_binding = True 2073 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2074 extend_props(self._parse_properties()) 2075 2076 shallow = self._match_text_seq("SHALLOW") 2077 2078 if self._match_texts(self.CLONE_KEYWORDS): 2079 copy = self._prev.text.lower() == "copy" 2080 clone = self.expression( 2081 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2082 ) 2083 2084 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2085 return self._parse_as_command(start) 2086 2087 create_kind_text = create_token.text.upper() 2088 return self.expression( 2089 exp.Create, 2090 this=this, 2091 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2092 replace=replace, 2093 refresh=refresh, 2094 unique=unique, 2095 expression=expression, 2096 exists=exists, 2097 properties=properties, 2098 indexes=indexes, 2099 no_schema_binding=no_schema_binding, 2100 begin=begin, 2101 end=end, 2102 clone=clone, 2103 concurrently=concurrently, 2104 clustered=clustered, 2105 ) 2106 2107 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2108 seq = exp.SequenceProperties() 2109 2110 options = [] 2111 index = self._index 2112 2113 while self._curr: 2114 self._match(TokenType.COMMA) 2115 if self._match_text_seq("INCREMENT"): 2116 self._match_text_seq("BY") 2117 self._match_text_seq("=") 2118 seq.set("increment", self._parse_term()) 2119 elif self._match_text_seq("MINVALUE"): 2120 seq.set("minvalue", self._parse_term()) 2121 elif self._match_text_seq("MAXVALUE"): 2122 seq.set("maxvalue", self._parse_term()) 2123 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2124 self._match_text_seq("=") 2125 seq.set("start", self._parse_term()) 2126 elif self._match_text_seq("CACHE"): 2127 # T-SQL allows empty CACHE which is initialized dynamically 2128 seq.set("cache", self._parse_number() or True) 2129 elif self._match_text_seq("OWNED", "BY"): 2130 # "OWNED BY NONE" is the default 2131 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2132 else: 2133 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2134 if opt: 2135 options.append(opt) 2136 else: 2137 break 2138 2139 seq.set("options", options if options else None) 2140 return None if self._index == index else seq 2141 2142 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2143 # only used for teradata currently 2144 self._match(TokenType.COMMA) 2145 2146 kwargs = { 2147 "no": self._match_text_seq("NO"), 2148 "dual": self._match_text_seq("DUAL"), 2149 "before": self._match_text_seq("BEFORE"), 2150 "default": self._match_text_seq("DEFAULT"), 2151 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2152 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2153 "after": self._match_text_seq("AFTER"), 2154 "minimum": self._match_texts(("MIN", "MINIMUM")), 2155 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2156 } 2157 2158 if self._match_texts(self.PROPERTY_PARSERS): 2159 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2160 try: 2161 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2162 except TypeError: 2163 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2164 2165 return None 2166 2167 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2168 return self._parse_wrapped_csv(self._parse_property) 2169 2170 def _parse_property(self) -> t.Optional[exp.Expression]: 2171 if self._match_texts(self.PROPERTY_PARSERS): 2172 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2173 2174 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2175 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2176 2177 if self._match_text_seq("COMPOUND", "SORTKEY"): 2178 return self._parse_sortkey(compound=True) 2179 2180 if self._match_text_seq("SQL", "SECURITY"): 2181 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2182 2183 index = self._index 2184 key = self._parse_column() 2185 2186 if not self._match(TokenType.EQ): 2187 self._retreat(index) 2188 return self._parse_sequence_properties() 2189 2190 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2191 if isinstance(key, exp.Column): 2192 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2193 2194 value = self._parse_bitwise() or self._parse_var(any_token=True) 2195 2196 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2197 if isinstance(value, exp.Column): 2198 value = exp.var(value.name) 2199 2200 return self.expression(exp.Property, this=key, value=value) 2201 2202 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2203 if self._match_text_seq("BY"): 2204 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2205 2206 self._match(TokenType.ALIAS) 2207 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2208 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2209 2210 return self.expression( 2211 exp.FileFormatProperty, 2212 this=( 2213 self.expression( 2214 exp.InputOutputFormat, 2215 input_format=input_format, 2216 output_format=output_format, 2217 ) 2218 if input_format or output_format 2219 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2220 ), 2221 ) 2222 2223 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2224 field = self._parse_field() 2225 if isinstance(field, exp.Identifier) and not field.quoted: 2226 field = exp.var(field) 2227 2228 return field 2229 2230 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2231 self._match(TokenType.EQ) 2232 self._match(TokenType.ALIAS) 2233 2234 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2235 2236 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2237 properties = [] 2238 while True: 2239 if before: 2240 prop = self._parse_property_before() 2241 else: 2242 prop = self._parse_property() 2243 if not prop: 2244 break 2245 for p in ensure_list(prop): 2246 properties.append(p) 2247 2248 if properties: 2249 return self.expression(exp.Properties, expressions=properties) 2250 2251 return None 2252 2253 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2254 return self.expression( 2255 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2256 ) 2257 2258 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2259 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2260 security_specifier = self._prev.text.upper() 2261 return self.expression(exp.SecurityProperty, this=security_specifier) 2262 return None 2263 2264 def _parse_settings_property(self) -> exp.SettingsProperty: 2265 return self.expression( 2266 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2267 ) 2268 2269 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2270 if self._index >= 2: 2271 pre_volatile_token = self._tokens[self._index - 2] 2272 else: 2273 pre_volatile_token = None 2274 2275 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2276 return exp.VolatileProperty() 2277 2278 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2279 2280 def _parse_retention_period(self) -> exp.Var: 2281 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2282 number = self._parse_number() 2283 number_str = f"{number} " if number else "" 2284 unit = self._parse_var(any_token=True) 2285 return exp.var(f"{number_str}{unit}") 2286 2287 def _parse_system_versioning_property( 2288 self, with_: bool = False 2289 ) -> exp.WithSystemVersioningProperty: 2290 self._match(TokenType.EQ) 2291 prop = self.expression( 2292 exp.WithSystemVersioningProperty, 2293 **{ # type: ignore 2294 "on": True, 2295 "with": with_, 2296 }, 2297 ) 2298 2299 if self._match_text_seq("OFF"): 2300 prop.set("on", False) 2301 return prop 2302 2303 self._match(TokenType.ON) 2304 if self._match(TokenType.L_PAREN): 2305 while self._curr and not self._match(TokenType.R_PAREN): 2306 if self._match_text_seq("HISTORY_TABLE", "="): 2307 prop.set("this", self._parse_table_parts()) 2308 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2309 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2310 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2311 prop.set("retention_period", self._parse_retention_period()) 2312 2313 self._match(TokenType.COMMA) 2314 2315 return prop 2316 2317 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2318 self._match(TokenType.EQ) 2319 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2320 prop = self.expression(exp.DataDeletionProperty, on=on) 2321 2322 if self._match(TokenType.L_PAREN): 2323 while self._curr and not self._match(TokenType.R_PAREN): 2324 if self._match_text_seq("FILTER_COLUMN", "="): 2325 prop.set("filter_column", self._parse_column()) 2326 elif self._match_text_seq("RETENTION_PERIOD", "="): 2327 prop.set("retention_period", self._parse_retention_period()) 2328 2329 self._match(TokenType.COMMA) 2330 2331 return prop 2332 2333 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2334 kind = "HASH" 2335 expressions: t.Optional[t.List[exp.Expression]] = None 2336 if self._match_text_seq("BY", "HASH"): 2337 expressions = self._parse_wrapped_csv(self._parse_id_var) 2338 elif self._match_text_seq("BY", "RANDOM"): 2339 kind = "RANDOM" 2340 2341 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2342 buckets: t.Optional[exp.Expression] = None 2343 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2344 buckets = self._parse_number() 2345 2346 return self.expression( 2347 exp.DistributedByProperty, 2348 expressions=expressions, 2349 kind=kind, 2350 buckets=buckets, 2351 order=self._parse_order(), 2352 ) 2353 2354 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2355 self._match_text_seq("KEY") 2356 expressions = self._parse_wrapped_id_vars() 2357 return self.expression(expr_type, expressions=expressions) 2358 2359 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2360 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2361 prop = self._parse_system_versioning_property(with_=True) 2362 self._match_r_paren() 2363 return prop 2364 2365 if self._match(TokenType.L_PAREN, advance=False): 2366 return self._parse_wrapped_properties() 2367 2368 if self._match_text_seq("JOURNAL"): 2369 return self._parse_withjournaltable() 2370 2371 if self._match_texts(self.VIEW_ATTRIBUTES): 2372 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2373 2374 if self._match_text_seq("DATA"): 2375 return self._parse_withdata(no=False) 2376 elif self._match_text_seq("NO", "DATA"): 2377 return self._parse_withdata(no=True) 2378 2379 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2380 return self._parse_serde_properties(with_=True) 2381 2382 if self._match(TokenType.SCHEMA): 2383 return self.expression( 2384 exp.WithSchemaBindingProperty, 2385 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2386 ) 2387 2388 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2389 return self.expression( 2390 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2391 ) 2392 2393 if not self._next: 2394 return None 2395 2396 return self._parse_withisolatedloading() 2397 2398 def _parse_procedure_option(self) -> exp.Expression | None: 2399 if self._match_text_seq("EXECUTE", "AS"): 2400 return self.expression( 2401 exp.ExecuteAsProperty, 2402 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2403 or self._parse_string(), 2404 ) 2405 2406 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2407 2408 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2409 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2410 self._match(TokenType.EQ) 2411 2412 user = self._parse_id_var() 2413 self._match(TokenType.PARAMETER) 2414 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2415 2416 if not user or not host: 2417 return None 2418 2419 return exp.DefinerProperty(this=f"{user}@{host}") 2420 2421 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2422 self._match(TokenType.TABLE) 2423 self._match(TokenType.EQ) 2424 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2425 2426 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2427 return self.expression(exp.LogProperty, no=no) 2428 2429 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2430 return self.expression(exp.JournalProperty, **kwargs) 2431 2432 def _parse_checksum(self) -> exp.ChecksumProperty: 2433 self._match(TokenType.EQ) 2434 2435 on = None 2436 if self._match(TokenType.ON): 2437 on = True 2438 elif self._match_text_seq("OFF"): 2439 on = False 2440 2441 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2442 2443 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2444 return self.expression( 2445 exp.Cluster, 2446 expressions=( 2447 self._parse_wrapped_csv(self._parse_ordered) 2448 if wrapped 2449 else self._parse_csv(self._parse_ordered) 2450 ), 2451 ) 2452 2453 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2454 self._match_text_seq("BY") 2455 2456 self._match_l_paren() 2457 expressions = self._parse_csv(self._parse_column) 2458 self._match_r_paren() 2459 2460 if self._match_text_seq("SORTED", "BY"): 2461 self._match_l_paren() 2462 sorted_by = self._parse_csv(self._parse_ordered) 2463 self._match_r_paren() 2464 else: 2465 sorted_by = None 2466 2467 self._match(TokenType.INTO) 2468 buckets = self._parse_number() 2469 self._match_text_seq("BUCKETS") 2470 2471 return self.expression( 2472 exp.ClusteredByProperty, 2473 expressions=expressions, 2474 sorted_by=sorted_by, 2475 buckets=buckets, 2476 ) 2477 2478 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2479 if not self._match_text_seq("GRANTS"): 2480 self._retreat(self._index - 1) 2481 return None 2482 2483 return self.expression(exp.CopyGrantsProperty) 2484 2485 def _parse_freespace(self) -> exp.FreespaceProperty: 2486 self._match(TokenType.EQ) 2487 return self.expression( 2488 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2489 ) 2490 2491 def _parse_mergeblockratio( 2492 self, no: bool = False, default: bool = False 2493 ) -> exp.MergeBlockRatioProperty: 2494 if self._match(TokenType.EQ): 2495 return self.expression( 2496 exp.MergeBlockRatioProperty, 2497 this=self._parse_number(), 2498 percent=self._match(TokenType.PERCENT), 2499 ) 2500 2501 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2502 2503 def _parse_datablocksize( 2504 self, 2505 default: t.Optional[bool] = None, 2506 minimum: t.Optional[bool] = None, 2507 maximum: t.Optional[bool] = None, 2508 ) -> exp.DataBlocksizeProperty: 2509 self._match(TokenType.EQ) 2510 size = self._parse_number() 2511 2512 units = None 2513 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2514 units = self._prev.text 2515 2516 return self.expression( 2517 exp.DataBlocksizeProperty, 2518 size=size, 2519 units=units, 2520 default=default, 2521 minimum=minimum, 2522 maximum=maximum, 2523 ) 2524 2525 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2526 self._match(TokenType.EQ) 2527 always = self._match_text_seq("ALWAYS") 2528 manual = self._match_text_seq("MANUAL") 2529 never = self._match_text_seq("NEVER") 2530 default = self._match_text_seq("DEFAULT") 2531 2532 autotemp = None 2533 if self._match_text_seq("AUTOTEMP"): 2534 autotemp = self._parse_schema() 2535 2536 return self.expression( 2537 exp.BlockCompressionProperty, 2538 always=always, 2539 manual=manual, 2540 never=never, 2541 default=default, 2542 autotemp=autotemp, 2543 ) 2544 2545 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2546 index = self._index 2547 no = self._match_text_seq("NO") 2548 concurrent = self._match_text_seq("CONCURRENT") 2549 2550 if not self._match_text_seq("ISOLATED", "LOADING"): 2551 self._retreat(index) 2552 return None 2553 2554 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2555 return self.expression( 2556 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2557 ) 2558 2559 def _parse_locking(self) -> exp.LockingProperty: 2560 if self._match(TokenType.TABLE): 2561 kind = "TABLE" 2562 elif self._match(TokenType.VIEW): 2563 kind = "VIEW" 2564 elif self._match(TokenType.ROW): 2565 kind = "ROW" 2566 elif self._match_text_seq("DATABASE"): 2567 kind = "DATABASE" 2568 else: 2569 kind = None 2570 2571 if kind in ("DATABASE", "TABLE", "VIEW"): 2572 this = self._parse_table_parts() 2573 else: 2574 this = None 2575 2576 if self._match(TokenType.FOR): 2577 for_or_in = "FOR" 2578 elif self._match(TokenType.IN): 2579 for_or_in = "IN" 2580 else: 2581 for_or_in = None 2582 2583 if self._match_text_seq("ACCESS"): 2584 lock_type = "ACCESS" 2585 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2586 lock_type = "EXCLUSIVE" 2587 elif self._match_text_seq("SHARE"): 2588 lock_type = "SHARE" 2589 elif self._match_text_seq("READ"): 2590 lock_type = "READ" 2591 elif self._match_text_seq("WRITE"): 2592 lock_type = "WRITE" 2593 elif self._match_text_seq("CHECKSUM"): 2594 lock_type = "CHECKSUM" 2595 else: 2596 lock_type = None 2597 2598 override = self._match_text_seq("OVERRIDE") 2599 2600 return self.expression( 2601 exp.LockingProperty, 2602 this=this, 2603 kind=kind, 2604 for_or_in=for_or_in, 2605 lock_type=lock_type, 2606 override=override, 2607 ) 2608 2609 def _parse_partition_by(self) -> t.List[exp.Expression]: 2610 if self._match(TokenType.PARTITION_BY): 2611 return self._parse_csv(self._parse_assignment) 2612 return [] 2613 2614 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2615 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2616 if self._match_text_seq("MINVALUE"): 2617 return exp.var("MINVALUE") 2618 if self._match_text_seq("MAXVALUE"): 2619 return exp.var("MAXVALUE") 2620 return self._parse_bitwise() 2621 2622 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2623 expression = None 2624 from_expressions = None 2625 to_expressions = None 2626 2627 if self._match(TokenType.IN): 2628 this = self._parse_wrapped_csv(self._parse_bitwise) 2629 elif self._match(TokenType.FROM): 2630 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2631 self._match_text_seq("TO") 2632 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2633 elif self._match_text_seq("WITH", "(", "MODULUS"): 2634 this = self._parse_number() 2635 self._match_text_seq(",", "REMAINDER") 2636 expression = self._parse_number() 2637 self._match_r_paren() 2638 else: 2639 self.raise_error("Failed to parse partition bound spec.") 2640 2641 return self.expression( 2642 exp.PartitionBoundSpec, 2643 this=this, 2644 expression=expression, 2645 from_expressions=from_expressions, 2646 to_expressions=to_expressions, 2647 ) 2648 2649 # https://www.postgresql.org/docs/current/sql-createtable.html 2650 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2651 if not self._match_text_seq("OF"): 2652 self._retreat(self._index - 1) 2653 return None 2654 2655 this = self._parse_table(schema=True) 2656 2657 if self._match(TokenType.DEFAULT): 2658 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2659 elif self._match_text_seq("FOR", "VALUES"): 2660 expression = self._parse_partition_bound_spec() 2661 else: 2662 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2663 2664 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2665 2666 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2667 self._match(TokenType.EQ) 2668 return self.expression( 2669 exp.PartitionedByProperty, 2670 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2671 ) 2672 2673 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2674 if self._match_text_seq("AND", "STATISTICS"): 2675 statistics = True 2676 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2677 statistics = False 2678 else: 2679 statistics = None 2680 2681 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2682 2683 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2684 if self._match_text_seq("SQL"): 2685 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2686 return None 2687 2688 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2689 if self._match_text_seq("SQL", "DATA"): 2690 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2691 return None 2692 2693 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2694 if self._match_text_seq("PRIMARY", "INDEX"): 2695 return exp.NoPrimaryIndexProperty() 2696 if self._match_text_seq("SQL"): 2697 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2698 return None 2699 2700 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2701 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2702 return exp.OnCommitProperty() 2703 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2704 return exp.OnCommitProperty(delete=True) 2705 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2706 2707 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2708 if self._match_text_seq("SQL", "DATA"): 2709 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2710 return None 2711 2712 def _parse_distkey(self) -> exp.DistKeyProperty: 2713 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2714 2715 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2716 table = self._parse_table(schema=True) 2717 2718 options = [] 2719 while self._match_texts(("INCLUDING", "EXCLUDING")): 2720 this = self._prev.text.upper() 2721 2722 id_var = self._parse_id_var() 2723 if not id_var: 2724 return None 2725 2726 options.append( 2727 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2728 ) 2729 2730 return self.expression(exp.LikeProperty, this=table, expressions=options) 2731 2732 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2733 return self.expression( 2734 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2735 ) 2736 2737 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2738 self._match(TokenType.EQ) 2739 return self.expression( 2740 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2741 ) 2742 2743 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2744 self._match_text_seq("WITH", "CONNECTION") 2745 return self.expression( 2746 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2747 ) 2748 2749 def _parse_returns(self) -> exp.ReturnsProperty: 2750 value: t.Optional[exp.Expression] 2751 null = None 2752 is_table = self._match(TokenType.TABLE) 2753 2754 if is_table: 2755 if self._match(TokenType.LT): 2756 value = self.expression( 2757 exp.Schema, 2758 this="TABLE", 2759 expressions=self._parse_csv(self._parse_struct_types), 2760 ) 2761 if not self._match(TokenType.GT): 2762 self.raise_error("Expecting >") 2763 else: 2764 value = self._parse_schema(exp.var("TABLE")) 2765 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2766 null = True 2767 value = None 2768 else: 2769 value = self._parse_types() 2770 2771 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2772 2773 def _parse_describe(self) -> exp.Describe: 2774 kind = self._match_set(self.CREATABLES) and self._prev.text 2775 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2776 if self._match(TokenType.DOT): 2777 style = None 2778 self._retreat(self._index - 2) 2779 2780 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2781 2782 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2783 this = self._parse_statement() 2784 else: 2785 this = self._parse_table(schema=True) 2786 2787 properties = self._parse_properties() 2788 expressions = properties.expressions if properties else None 2789 partition = self._parse_partition() 2790 return self.expression( 2791 exp.Describe, 2792 this=this, 2793 style=style, 2794 kind=kind, 2795 expressions=expressions, 2796 partition=partition, 2797 format=format, 2798 ) 2799 2800 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2801 kind = self._prev.text.upper() 2802 expressions = [] 2803 2804 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2805 if self._match(TokenType.WHEN): 2806 expression = self._parse_disjunction() 2807 self._match(TokenType.THEN) 2808 else: 2809 expression = None 2810 2811 else_ = self._match(TokenType.ELSE) 2812 2813 if not self._match(TokenType.INTO): 2814 return None 2815 2816 return self.expression( 2817 exp.ConditionalInsert, 2818 this=self.expression( 2819 exp.Insert, 2820 this=self._parse_table(schema=True), 2821 expression=self._parse_derived_table_values(), 2822 ), 2823 expression=expression, 2824 else_=else_, 2825 ) 2826 2827 expression = parse_conditional_insert() 2828 while expression is not None: 2829 expressions.append(expression) 2830 expression = parse_conditional_insert() 2831 2832 return self.expression( 2833 exp.MultitableInserts, 2834 kind=kind, 2835 comments=comments, 2836 expressions=expressions, 2837 source=self._parse_table(), 2838 ) 2839 2840 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2841 comments = [] 2842 hint = self._parse_hint() 2843 overwrite = self._match(TokenType.OVERWRITE) 2844 ignore = self._match(TokenType.IGNORE) 2845 local = self._match_text_seq("LOCAL") 2846 alternative = None 2847 is_function = None 2848 2849 if self._match_text_seq("DIRECTORY"): 2850 this: t.Optional[exp.Expression] = self.expression( 2851 exp.Directory, 2852 this=self._parse_var_or_string(), 2853 local=local, 2854 row_format=self._parse_row_format(match_row=True), 2855 ) 2856 else: 2857 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2858 comments += ensure_list(self._prev_comments) 2859 return self._parse_multitable_inserts(comments) 2860 2861 if self._match(TokenType.OR): 2862 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2863 2864 self._match(TokenType.INTO) 2865 comments += ensure_list(self._prev_comments) 2866 self._match(TokenType.TABLE) 2867 is_function = self._match(TokenType.FUNCTION) 2868 2869 this = ( 2870 self._parse_table(schema=True, parse_partition=True) 2871 if not is_function 2872 else self._parse_function() 2873 ) 2874 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2875 this.set("alias", self._parse_table_alias()) 2876 2877 returning = self._parse_returning() 2878 2879 return self.expression( 2880 exp.Insert, 2881 comments=comments, 2882 hint=hint, 2883 is_function=is_function, 2884 this=this, 2885 stored=self._match_text_seq("STORED") and self._parse_stored(), 2886 by_name=self._match_text_seq("BY", "NAME"), 2887 exists=self._parse_exists(), 2888 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2889 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2890 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2891 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2892 conflict=self._parse_on_conflict(), 2893 returning=returning or self._parse_returning(), 2894 overwrite=overwrite, 2895 alternative=alternative, 2896 ignore=ignore, 2897 source=self._match(TokenType.TABLE) and self._parse_table(), 2898 ) 2899 2900 def _parse_kill(self) -> exp.Kill: 2901 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2902 2903 return self.expression( 2904 exp.Kill, 2905 this=self._parse_primary(), 2906 kind=kind, 2907 ) 2908 2909 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2910 conflict = self._match_text_seq("ON", "CONFLICT") 2911 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2912 2913 if not conflict and not duplicate: 2914 return None 2915 2916 conflict_keys = None 2917 constraint = None 2918 2919 if conflict: 2920 if self._match_text_seq("ON", "CONSTRAINT"): 2921 constraint = self._parse_id_var() 2922 elif self._match(TokenType.L_PAREN): 2923 conflict_keys = self._parse_csv(self._parse_id_var) 2924 self._match_r_paren() 2925 2926 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2927 if self._prev.token_type == TokenType.UPDATE: 2928 self._match(TokenType.SET) 2929 expressions = self._parse_csv(self._parse_equality) 2930 else: 2931 expressions = None 2932 2933 return self.expression( 2934 exp.OnConflict, 2935 duplicate=duplicate, 2936 expressions=expressions, 2937 action=action, 2938 conflict_keys=conflict_keys, 2939 constraint=constraint, 2940 where=self._parse_where(), 2941 ) 2942 2943 def _parse_returning(self) -> t.Optional[exp.Returning]: 2944 if not self._match(TokenType.RETURNING): 2945 return None 2946 return self.expression( 2947 exp.Returning, 2948 expressions=self._parse_csv(self._parse_expression), 2949 into=self._match(TokenType.INTO) and self._parse_table_part(), 2950 ) 2951 2952 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2953 if not self._match(TokenType.FORMAT): 2954 return None 2955 return self._parse_row_format() 2956 2957 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2958 index = self._index 2959 with_ = with_ or self._match_text_seq("WITH") 2960 2961 if not self._match(TokenType.SERDE_PROPERTIES): 2962 self._retreat(index) 2963 return None 2964 return self.expression( 2965 exp.SerdeProperties, 2966 **{ # type: ignore 2967 "expressions": self._parse_wrapped_properties(), 2968 "with": with_, 2969 }, 2970 ) 2971 2972 def _parse_row_format( 2973 self, match_row: bool = False 2974 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2975 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2976 return None 2977 2978 if self._match_text_seq("SERDE"): 2979 this = self._parse_string() 2980 2981 serde_properties = self._parse_serde_properties() 2982 2983 return self.expression( 2984 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2985 ) 2986 2987 self._match_text_seq("DELIMITED") 2988 2989 kwargs = {} 2990 2991 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2992 kwargs["fields"] = self._parse_string() 2993 if self._match_text_seq("ESCAPED", "BY"): 2994 kwargs["escaped"] = self._parse_string() 2995 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2996 kwargs["collection_items"] = self._parse_string() 2997 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2998 kwargs["map_keys"] = self._parse_string() 2999 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3000 kwargs["lines"] = self._parse_string() 3001 if self._match_text_seq("NULL", "DEFINED", "AS"): 3002 kwargs["null"] = self._parse_string() 3003 3004 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3005 3006 def _parse_load(self) -> exp.LoadData | exp.Command: 3007 if self._match_text_seq("DATA"): 3008 local = self._match_text_seq("LOCAL") 3009 self._match_text_seq("INPATH") 3010 inpath = self._parse_string() 3011 overwrite = self._match(TokenType.OVERWRITE) 3012 self._match_pair(TokenType.INTO, TokenType.TABLE) 3013 3014 return self.expression( 3015 exp.LoadData, 3016 this=self._parse_table(schema=True), 3017 local=local, 3018 overwrite=overwrite, 3019 inpath=inpath, 3020 partition=self._parse_partition(), 3021 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3022 serde=self._match_text_seq("SERDE") and self._parse_string(), 3023 ) 3024 return self._parse_as_command(self._prev) 3025 3026 def _parse_delete(self) -> exp.Delete: 3027 # This handles MySQL's "Multiple-Table Syntax" 3028 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3029 tables = None 3030 if not self._match(TokenType.FROM, advance=False): 3031 tables = self._parse_csv(self._parse_table) or None 3032 3033 returning = self._parse_returning() 3034 3035 return self.expression( 3036 exp.Delete, 3037 tables=tables, 3038 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3039 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3040 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3041 where=self._parse_where(), 3042 returning=returning or self._parse_returning(), 3043 limit=self._parse_limit(), 3044 ) 3045 3046 def _parse_update(self) -> exp.Update: 3047 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3048 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3049 returning = self._parse_returning() 3050 return self.expression( 3051 exp.Update, 3052 **{ # type: ignore 3053 "this": this, 3054 "expressions": expressions, 3055 "from": self._parse_from(joins=True), 3056 "where": self._parse_where(), 3057 "returning": returning or self._parse_returning(), 3058 "order": self._parse_order(), 3059 "limit": self._parse_limit(), 3060 }, 3061 ) 3062 3063 def _parse_use(self) -> exp.Use: 3064 return self.expression( 3065 exp.Use, 3066 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3067 this=self._parse_table(schema=False), 3068 ) 3069 3070 def _parse_uncache(self) -> exp.Uncache: 3071 if not self._match(TokenType.TABLE): 3072 self.raise_error("Expecting TABLE after UNCACHE") 3073 3074 return self.expression( 3075 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3076 ) 3077 3078 def _parse_cache(self) -> exp.Cache: 3079 lazy = self._match_text_seq("LAZY") 3080 self._match(TokenType.TABLE) 3081 table = self._parse_table(schema=True) 3082 3083 options = [] 3084 if self._match_text_seq("OPTIONS"): 3085 self._match_l_paren() 3086 k = self._parse_string() 3087 self._match(TokenType.EQ) 3088 v = self._parse_string() 3089 options = [k, v] 3090 self._match_r_paren() 3091 3092 self._match(TokenType.ALIAS) 3093 return self.expression( 3094 exp.Cache, 3095 this=table, 3096 lazy=lazy, 3097 options=options, 3098 expression=self._parse_select(nested=True), 3099 ) 3100 3101 def _parse_partition(self) -> t.Optional[exp.Partition]: 3102 if not self._match_texts(self.PARTITION_KEYWORDS): 3103 return None 3104 3105 return self.expression( 3106 exp.Partition, 3107 subpartition=self._prev.text.upper() == "SUBPARTITION", 3108 expressions=self._parse_wrapped_csv(self._parse_assignment), 3109 ) 3110 3111 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3112 def _parse_value_expression() -> t.Optional[exp.Expression]: 3113 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3114 return exp.var(self._prev.text.upper()) 3115 return self._parse_expression() 3116 3117 if self._match(TokenType.L_PAREN): 3118 expressions = self._parse_csv(_parse_value_expression) 3119 self._match_r_paren() 3120 return self.expression(exp.Tuple, expressions=expressions) 3121 3122 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3123 expression = self._parse_expression() 3124 if expression: 3125 return self.expression(exp.Tuple, expressions=[expression]) 3126 return None 3127 3128 def _parse_projections(self) -> t.List[exp.Expression]: 3129 return self._parse_expressions() 3130 3131 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3132 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3133 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3134 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3135 ) 3136 elif self._match(TokenType.FROM): 3137 from_ = self._parse_from(skip_from_token=True) 3138 # Support parentheses for duckdb FROM-first syntax 3139 select = self._parse_select() 3140 if select: 3141 select.set("from", from_) 3142 this = select 3143 else: 3144 this = exp.select("*").from_(t.cast(exp.From, from_)) 3145 else: 3146 this = ( 3147 self._parse_table() 3148 if table 3149 else self._parse_select(nested=True, parse_set_operation=False) 3150 ) 3151 3152 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3153 # in case a modifier (e.g. join) is following 3154 if table and isinstance(this, exp.Values) and this.alias: 3155 alias = this.args["alias"].pop() 3156 this = exp.Table(this=this, alias=alias) 3157 3158 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3159 3160 return this 3161 3162 def _parse_select( 3163 self, 3164 nested: bool = False, 3165 table: bool = False, 3166 parse_subquery_alias: bool = True, 3167 parse_set_operation: bool = True, 3168 ) -> t.Optional[exp.Expression]: 3169 cte = self._parse_with() 3170 3171 if cte: 3172 this = self._parse_statement() 3173 3174 if not this: 3175 self.raise_error("Failed to parse any statement following CTE") 3176 return cte 3177 3178 if "with" in this.arg_types: 3179 this.set("with", cte) 3180 else: 3181 self.raise_error(f"{this.key} does not support CTE") 3182 this = cte 3183 3184 return this 3185 3186 # duckdb supports leading with FROM x 3187 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3188 3189 if self._match(TokenType.SELECT): 3190 comments = self._prev_comments 3191 3192 hint = self._parse_hint() 3193 3194 if self._next and not self._next.token_type == TokenType.DOT: 3195 all_ = self._match(TokenType.ALL) 3196 distinct = self._match_set(self.DISTINCT_TOKENS) 3197 else: 3198 all_, distinct = None, None 3199 3200 kind = ( 3201 self._match(TokenType.ALIAS) 3202 and self._match_texts(("STRUCT", "VALUE")) 3203 and self._prev.text.upper() 3204 ) 3205 3206 if distinct: 3207 distinct = self.expression( 3208 exp.Distinct, 3209 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3210 ) 3211 3212 if all_ and distinct: 3213 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3214 3215 operation_modifiers = [] 3216 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3217 operation_modifiers.append(exp.var(self._prev.text.upper())) 3218 3219 limit = self._parse_limit(top=True) 3220 projections = self._parse_projections() 3221 3222 this = self.expression( 3223 exp.Select, 3224 kind=kind, 3225 hint=hint, 3226 distinct=distinct, 3227 expressions=projections, 3228 limit=limit, 3229 operation_modifiers=operation_modifiers or None, 3230 ) 3231 this.comments = comments 3232 3233 into = self._parse_into() 3234 if into: 3235 this.set("into", into) 3236 3237 if not from_: 3238 from_ = self._parse_from() 3239 3240 if from_: 3241 this.set("from", from_) 3242 3243 this = self._parse_query_modifiers(this) 3244 elif (table or nested) and self._match(TokenType.L_PAREN): 3245 this = self._parse_wrapped_select(table=table) 3246 3247 # We return early here so that the UNION isn't attached to the subquery by the 3248 # following call to _parse_set_operations, but instead becomes the parent node 3249 self._match_r_paren() 3250 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3251 elif self._match(TokenType.VALUES, advance=False): 3252 this = self._parse_derived_table_values() 3253 elif from_: 3254 if self._match(TokenType.PIPE_GT, advance=False): 3255 return self._parse_pipe_syntax_query( 3256 exp.Select().from_(from_.this, append=False, copy=False) 3257 ) 3258 this = exp.select("*").from_(from_.this, copy=False) 3259 elif self._match(TokenType.SUMMARIZE): 3260 table = self._match(TokenType.TABLE) 3261 this = self._parse_select() or self._parse_string() or self._parse_table() 3262 return self.expression(exp.Summarize, this=this, table=table) 3263 elif self._match(TokenType.DESCRIBE): 3264 this = self._parse_describe() 3265 elif self._match_text_seq("STREAM"): 3266 this = self._parse_function() 3267 if this: 3268 this = self.expression(exp.Stream, this=this) 3269 else: 3270 self._retreat(self._index - 1) 3271 else: 3272 this = None 3273 3274 return self._parse_set_operations(this) if parse_set_operation else this 3275 3276 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3277 self._match_text_seq("SEARCH") 3278 3279 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3280 3281 if not kind: 3282 return None 3283 3284 self._match_text_seq("FIRST", "BY") 3285 3286 return self.expression( 3287 exp.RecursiveWithSearch, 3288 kind=kind, 3289 this=self._parse_id_var(), 3290 expression=self._match_text_seq("SET") and self._parse_id_var(), 3291 using=self._match_text_seq("USING") and self._parse_id_var(), 3292 ) 3293 3294 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3295 if not skip_with_token and not self._match(TokenType.WITH): 3296 return None 3297 3298 comments = self._prev_comments 3299 recursive = self._match(TokenType.RECURSIVE) 3300 3301 last_comments = None 3302 expressions = [] 3303 while True: 3304 cte = self._parse_cte() 3305 if isinstance(cte, exp.CTE): 3306 expressions.append(cte) 3307 if last_comments: 3308 cte.add_comments(last_comments) 3309 3310 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3311 break 3312 else: 3313 self._match(TokenType.WITH) 3314 3315 last_comments = self._prev_comments 3316 3317 return self.expression( 3318 exp.With, 3319 comments=comments, 3320 expressions=expressions, 3321 recursive=recursive, 3322 search=self._parse_recursive_with_search(), 3323 ) 3324 3325 def _parse_cte(self) -> t.Optional[exp.CTE]: 3326 index = self._index 3327 3328 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3329 if not alias or not alias.this: 3330 self.raise_error("Expected CTE to have alias") 3331 3332 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3333 self._retreat(index) 3334 return None 3335 3336 comments = self._prev_comments 3337 3338 if self._match_text_seq("NOT", "MATERIALIZED"): 3339 materialized = False 3340 elif self._match_text_seq("MATERIALIZED"): 3341 materialized = True 3342 else: 3343 materialized = None 3344 3345 cte = self.expression( 3346 exp.CTE, 3347 this=self._parse_wrapped(self._parse_statement), 3348 alias=alias, 3349 materialized=materialized, 3350 comments=comments, 3351 ) 3352 3353 if isinstance(cte.this, exp.Values): 3354 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3355 3356 return cte 3357 3358 def _parse_table_alias( 3359 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3360 ) -> t.Optional[exp.TableAlias]: 3361 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3362 # so this section tries to parse the clause version and if it fails, it treats the token 3363 # as an identifier (alias) 3364 if self._can_parse_limit_or_offset(): 3365 return None 3366 3367 any_token = self._match(TokenType.ALIAS) 3368 alias = ( 3369 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3370 or self._parse_string_as_identifier() 3371 ) 3372 3373 index = self._index 3374 if self._match(TokenType.L_PAREN): 3375 columns = self._parse_csv(self._parse_function_parameter) 3376 self._match_r_paren() if columns else self._retreat(index) 3377 else: 3378 columns = None 3379 3380 if not alias and not columns: 3381 return None 3382 3383 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3384 3385 # We bubble up comments from the Identifier to the TableAlias 3386 if isinstance(alias, exp.Identifier): 3387 table_alias.add_comments(alias.pop_comments()) 3388 3389 return table_alias 3390 3391 def _parse_subquery( 3392 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3393 ) -> t.Optional[exp.Subquery]: 3394 if not this: 3395 return None 3396 3397 return self.expression( 3398 exp.Subquery, 3399 this=this, 3400 pivots=self._parse_pivots(), 3401 alias=self._parse_table_alias() if parse_alias else None, 3402 sample=self._parse_table_sample(), 3403 ) 3404 3405 def _implicit_unnests_to_explicit(self, this: E) -> E: 3406 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3407 3408 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3409 for i, join in enumerate(this.args.get("joins") or []): 3410 table = join.this 3411 normalized_table = table.copy() 3412 normalized_table.meta["maybe_column"] = True 3413 normalized_table = _norm(normalized_table, dialect=self.dialect) 3414 3415 if isinstance(table, exp.Table) and not join.args.get("on"): 3416 if normalized_table.parts[0].name in refs: 3417 table_as_column = table.to_column() 3418 unnest = exp.Unnest(expressions=[table_as_column]) 3419 3420 # Table.to_column creates a parent Alias node that we want to convert to 3421 # a TableAlias and attach to the Unnest, so it matches the parser's output 3422 if isinstance(table.args.get("alias"), exp.TableAlias): 3423 table_as_column.replace(table_as_column.this) 3424 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3425 3426 table.replace(unnest) 3427 3428 refs.add(normalized_table.alias_or_name) 3429 3430 return this 3431 3432 def _parse_query_modifiers( 3433 self, this: t.Optional[exp.Expression] 3434 ) -> t.Optional[exp.Expression]: 3435 if isinstance(this, self.MODIFIABLES): 3436 for join in self._parse_joins(): 3437 this.append("joins", join) 3438 for lateral in iter(self._parse_lateral, None): 3439 this.append("laterals", lateral) 3440 3441 while True: 3442 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3443 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3444 key, expression = parser(self) 3445 3446 if expression: 3447 this.set(key, expression) 3448 if key == "limit": 3449 offset = expression.args.pop("offset", None) 3450 3451 if offset: 3452 offset = exp.Offset(expression=offset) 3453 this.set("offset", offset) 3454 3455 limit_by_expressions = expression.expressions 3456 expression.set("expressions", None) 3457 offset.set("expressions", limit_by_expressions) 3458 continue 3459 break 3460 3461 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3462 this = self._implicit_unnests_to_explicit(this) 3463 3464 return this 3465 3466 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3467 start = self._curr 3468 while self._curr: 3469 self._advance() 3470 3471 end = self._tokens[self._index - 1] 3472 return exp.Hint(expressions=[self._find_sql(start, end)]) 3473 3474 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3475 return self._parse_function_call() 3476 3477 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3478 start_index = self._index 3479 should_fallback_to_string = False 3480 3481 hints = [] 3482 try: 3483 for hint in iter( 3484 lambda: self._parse_csv( 3485 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3486 ), 3487 [], 3488 ): 3489 hints.extend(hint) 3490 except ParseError: 3491 should_fallback_to_string = True 3492 3493 if should_fallback_to_string or self._curr: 3494 self._retreat(start_index) 3495 return self._parse_hint_fallback_to_string() 3496 3497 return self.expression(exp.Hint, expressions=hints) 3498 3499 def _parse_hint(self) -> t.Optional[exp.Hint]: 3500 if self._match(TokenType.HINT) and self._prev_comments: 3501 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3502 3503 return None 3504 3505 def _parse_into(self) -> t.Optional[exp.Into]: 3506 if not self._match(TokenType.INTO): 3507 return None 3508 3509 temp = self._match(TokenType.TEMPORARY) 3510 unlogged = self._match_text_seq("UNLOGGED") 3511 self._match(TokenType.TABLE) 3512 3513 return self.expression( 3514 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3515 ) 3516 3517 def _parse_from( 3518 self, joins: bool = False, skip_from_token: bool = False 3519 ) -> t.Optional[exp.From]: 3520 if not skip_from_token and not self._match(TokenType.FROM): 3521 return None 3522 3523 return self.expression( 3524 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3525 ) 3526 3527 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3528 return self.expression( 3529 exp.MatchRecognizeMeasure, 3530 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3531 this=self._parse_expression(), 3532 ) 3533 3534 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3535 if not self._match(TokenType.MATCH_RECOGNIZE): 3536 return None 3537 3538 self._match_l_paren() 3539 3540 partition = self._parse_partition_by() 3541 order = self._parse_order() 3542 3543 measures = ( 3544 self._parse_csv(self._parse_match_recognize_measure) 3545 if self._match_text_seq("MEASURES") 3546 else None 3547 ) 3548 3549 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3550 rows = exp.var("ONE ROW PER MATCH") 3551 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3552 text = "ALL ROWS PER MATCH" 3553 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3554 text += " SHOW EMPTY MATCHES" 3555 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3556 text += " OMIT EMPTY MATCHES" 3557 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3558 text += " WITH UNMATCHED ROWS" 3559 rows = exp.var(text) 3560 else: 3561 rows = None 3562 3563 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3564 text = "AFTER MATCH SKIP" 3565 if self._match_text_seq("PAST", "LAST", "ROW"): 3566 text += " PAST LAST ROW" 3567 elif self._match_text_seq("TO", "NEXT", "ROW"): 3568 text += " TO NEXT ROW" 3569 elif self._match_text_seq("TO", "FIRST"): 3570 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3571 elif self._match_text_seq("TO", "LAST"): 3572 text += f" TO LAST {self._advance_any().text}" # type: ignore 3573 after = exp.var(text) 3574 else: 3575 after = None 3576 3577 if self._match_text_seq("PATTERN"): 3578 self._match_l_paren() 3579 3580 if not self._curr: 3581 self.raise_error("Expecting )", self._curr) 3582 3583 paren = 1 3584 start = self._curr 3585 3586 while self._curr and paren > 0: 3587 if self._curr.token_type == TokenType.L_PAREN: 3588 paren += 1 3589 if self._curr.token_type == TokenType.R_PAREN: 3590 paren -= 1 3591 3592 end = self._prev 3593 self._advance() 3594 3595 if paren > 0: 3596 self.raise_error("Expecting )", self._curr) 3597 3598 pattern = exp.var(self._find_sql(start, end)) 3599 else: 3600 pattern = None 3601 3602 define = ( 3603 self._parse_csv(self._parse_name_as_expression) 3604 if self._match_text_seq("DEFINE") 3605 else None 3606 ) 3607 3608 self._match_r_paren() 3609 3610 return self.expression( 3611 exp.MatchRecognize, 3612 partition_by=partition, 3613 order=order, 3614 measures=measures, 3615 rows=rows, 3616 after=after, 3617 pattern=pattern, 3618 define=define, 3619 alias=self._parse_table_alias(), 3620 ) 3621 3622 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3623 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3624 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3625 cross_apply = False 3626 3627 if cross_apply is not None: 3628 this = self._parse_select(table=True) 3629 view = None 3630 outer = None 3631 elif self._match(TokenType.LATERAL): 3632 this = self._parse_select(table=True) 3633 view = self._match(TokenType.VIEW) 3634 outer = self._match(TokenType.OUTER) 3635 else: 3636 return None 3637 3638 if not this: 3639 this = ( 3640 self._parse_unnest() 3641 or self._parse_function() 3642 or self._parse_id_var(any_token=False) 3643 ) 3644 3645 while self._match(TokenType.DOT): 3646 this = exp.Dot( 3647 this=this, 3648 expression=self._parse_function() or self._parse_id_var(any_token=False), 3649 ) 3650 3651 ordinality: t.Optional[bool] = None 3652 3653 if view: 3654 table = self._parse_id_var(any_token=False) 3655 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3656 table_alias: t.Optional[exp.TableAlias] = self.expression( 3657 exp.TableAlias, this=table, columns=columns 3658 ) 3659 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3660 # We move the alias from the lateral's child node to the lateral itself 3661 table_alias = this.args["alias"].pop() 3662 else: 3663 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3664 table_alias = self._parse_table_alias() 3665 3666 return self.expression( 3667 exp.Lateral, 3668 this=this, 3669 view=view, 3670 outer=outer, 3671 alias=table_alias, 3672 cross_apply=cross_apply, 3673 ordinality=ordinality, 3674 ) 3675 3676 def _parse_join_parts( 3677 self, 3678 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3679 return ( 3680 self._match_set(self.JOIN_METHODS) and self._prev, 3681 self._match_set(self.JOIN_SIDES) and self._prev, 3682 self._match_set(self.JOIN_KINDS) and self._prev, 3683 ) 3684 3685 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3686 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3687 this = self._parse_column() 3688 if isinstance(this, exp.Column): 3689 return this.this 3690 return this 3691 3692 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3693 3694 def _parse_join( 3695 self, skip_join_token: bool = False, parse_bracket: bool = False 3696 ) -> t.Optional[exp.Join]: 3697 if self._match(TokenType.COMMA): 3698 table = self._try_parse(self._parse_table) 3699 if table: 3700 return self.expression(exp.Join, this=table) 3701 return None 3702 3703 index = self._index 3704 method, side, kind = self._parse_join_parts() 3705 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3706 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3707 3708 if not skip_join_token and not join: 3709 self._retreat(index) 3710 kind = None 3711 method = None 3712 side = None 3713 3714 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3715 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3716 3717 if not skip_join_token and not join and not outer_apply and not cross_apply: 3718 return None 3719 3720 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3721 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3722 kwargs["expressions"] = self._parse_csv( 3723 lambda: self._parse_table(parse_bracket=parse_bracket) 3724 ) 3725 3726 if method: 3727 kwargs["method"] = method.text 3728 if side: 3729 kwargs["side"] = side.text 3730 if kind: 3731 kwargs["kind"] = kind.text 3732 if hint: 3733 kwargs["hint"] = hint 3734 3735 if self._match(TokenType.MATCH_CONDITION): 3736 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3737 3738 if self._match(TokenType.ON): 3739 kwargs["on"] = self._parse_assignment() 3740 elif self._match(TokenType.USING): 3741 kwargs["using"] = self._parse_using_identifiers() 3742 elif ( 3743 not (outer_apply or cross_apply) 3744 and not isinstance(kwargs["this"], exp.Unnest) 3745 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3746 ): 3747 index = self._index 3748 joins: t.Optional[list] = list(self._parse_joins()) 3749 3750 if joins and self._match(TokenType.ON): 3751 kwargs["on"] = self._parse_assignment() 3752 elif joins and self._match(TokenType.USING): 3753 kwargs["using"] = self._parse_using_identifiers() 3754 else: 3755 joins = None 3756 self._retreat(index) 3757 3758 kwargs["this"].set("joins", joins if joins else None) 3759 3760 kwargs["pivots"] = self._parse_pivots() 3761 3762 comments = [c for token in (method, side, kind) if token for c in token.comments] 3763 return self.expression(exp.Join, comments=comments, **kwargs) 3764 3765 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3766 this = self._parse_assignment() 3767 3768 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3769 return this 3770 3771 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3772 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3773 3774 return this 3775 3776 def _parse_index_params(self) -> exp.IndexParameters: 3777 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3778 3779 if self._match(TokenType.L_PAREN, advance=False): 3780 columns = self._parse_wrapped_csv(self._parse_with_operator) 3781 else: 3782 columns = None 3783 3784 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3785 partition_by = self._parse_partition_by() 3786 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3787 tablespace = ( 3788 self._parse_var(any_token=True) 3789 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3790 else None 3791 ) 3792 where = self._parse_where() 3793 3794 on = self._parse_field() if self._match(TokenType.ON) else None 3795 3796 return self.expression( 3797 exp.IndexParameters, 3798 using=using, 3799 columns=columns, 3800 include=include, 3801 partition_by=partition_by, 3802 where=where, 3803 with_storage=with_storage, 3804 tablespace=tablespace, 3805 on=on, 3806 ) 3807 3808 def _parse_index( 3809 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3810 ) -> t.Optional[exp.Index]: 3811 if index or anonymous: 3812 unique = None 3813 primary = None 3814 amp = None 3815 3816 self._match(TokenType.ON) 3817 self._match(TokenType.TABLE) # hive 3818 table = self._parse_table_parts(schema=True) 3819 else: 3820 unique = self._match(TokenType.UNIQUE) 3821 primary = self._match_text_seq("PRIMARY") 3822 amp = self._match_text_seq("AMP") 3823 3824 if not self._match(TokenType.INDEX): 3825 return None 3826 3827 index = self._parse_id_var() 3828 table = None 3829 3830 params = self._parse_index_params() 3831 3832 return self.expression( 3833 exp.Index, 3834 this=index, 3835 table=table, 3836 unique=unique, 3837 primary=primary, 3838 amp=amp, 3839 params=params, 3840 ) 3841 3842 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3843 hints: t.List[exp.Expression] = [] 3844 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3845 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3846 hints.append( 3847 self.expression( 3848 exp.WithTableHint, 3849 expressions=self._parse_csv( 3850 lambda: self._parse_function() or self._parse_var(any_token=True) 3851 ), 3852 ) 3853 ) 3854 self._match_r_paren() 3855 else: 3856 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3857 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3858 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3859 3860 self._match_set((TokenType.INDEX, TokenType.KEY)) 3861 if self._match(TokenType.FOR): 3862 hint.set("target", self._advance_any() and self._prev.text.upper()) 3863 3864 hint.set("expressions", self._parse_wrapped_id_vars()) 3865 hints.append(hint) 3866 3867 return hints or None 3868 3869 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3870 return ( 3871 (not schema and self._parse_function(optional_parens=False)) 3872 or self._parse_id_var(any_token=False) 3873 or self._parse_string_as_identifier() 3874 or self._parse_placeholder() 3875 ) 3876 3877 def _parse_table_parts( 3878 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3879 ) -> exp.Table: 3880 catalog = None 3881 db = None 3882 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3883 3884 while self._match(TokenType.DOT): 3885 if catalog: 3886 # This allows nesting the table in arbitrarily many dot expressions if needed 3887 table = self.expression( 3888 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3889 ) 3890 else: 3891 catalog = db 3892 db = table 3893 # "" used for tsql FROM a..b case 3894 table = self._parse_table_part(schema=schema) or "" 3895 3896 if ( 3897 wildcard 3898 and self._is_connected() 3899 and (isinstance(table, exp.Identifier) or not table) 3900 and self._match(TokenType.STAR) 3901 ): 3902 if isinstance(table, exp.Identifier): 3903 table.args["this"] += "*" 3904 else: 3905 table = exp.Identifier(this="*") 3906 3907 # We bubble up comments from the Identifier to the Table 3908 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3909 3910 if is_db_reference: 3911 catalog = db 3912 db = table 3913 table = None 3914 3915 if not table and not is_db_reference: 3916 self.raise_error(f"Expected table name but got {self._curr}") 3917 if not db and is_db_reference: 3918 self.raise_error(f"Expected database name but got {self._curr}") 3919 3920 table = self.expression( 3921 exp.Table, 3922 comments=comments, 3923 this=table, 3924 db=db, 3925 catalog=catalog, 3926 ) 3927 3928 changes = self._parse_changes() 3929 if changes: 3930 table.set("changes", changes) 3931 3932 at_before = self._parse_historical_data() 3933 if at_before: 3934 table.set("when", at_before) 3935 3936 pivots = self._parse_pivots() 3937 if pivots: 3938 table.set("pivots", pivots) 3939 3940 return table 3941 3942 def _parse_table( 3943 self, 3944 schema: bool = False, 3945 joins: bool = False, 3946 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3947 parse_bracket: bool = False, 3948 is_db_reference: bool = False, 3949 parse_partition: bool = False, 3950 ) -> t.Optional[exp.Expression]: 3951 lateral = self._parse_lateral() 3952 if lateral: 3953 return lateral 3954 3955 unnest = self._parse_unnest() 3956 if unnest: 3957 return unnest 3958 3959 values = self._parse_derived_table_values() 3960 if values: 3961 return values 3962 3963 subquery = self._parse_select(table=True) 3964 if subquery: 3965 if not subquery.args.get("pivots"): 3966 subquery.set("pivots", self._parse_pivots()) 3967 return subquery 3968 3969 bracket = parse_bracket and self._parse_bracket(None) 3970 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3971 3972 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3973 self._parse_table 3974 ) 3975 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3976 3977 only = self._match(TokenType.ONLY) 3978 3979 this = t.cast( 3980 exp.Expression, 3981 bracket 3982 or rows_from 3983 or self._parse_bracket( 3984 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3985 ), 3986 ) 3987 3988 if only: 3989 this.set("only", only) 3990 3991 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3992 self._match_text_seq("*") 3993 3994 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3995 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3996 this.set("partition", self._parse_partition()) 3997 3998 if schema: 3999 return self._parse_schema(this=this) 4000 4001 version = self._parse_version() 4002 4003 if version: 4004 this.set("version", version) 4005 4006 if self.dialect.ALIAS_POST_TABLESAMPLE: 4007 this.set("sample", self._parse_table_sample()) 4008 4009 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4010 if alias: 4011 this.set("alias", alias) 4012 4013 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4014 return self.expression( 4015 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4016 ) 4017 4018 this.set("hints", self._parse_table_hints()) 4019 4020 if not this.args.get("pivots"): 4021 this.set("pivots", self._parse_pivots()) 4022 4023 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4024 this.set("sample", self._parse_table_sample()) 4025 4026 if joins: 4027 for join in self._parse_joins(): 4028 this.append("joins", join) 4029 4030 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4031 this.set("ordinality", True) 4032 this.set("alias", self._parse_table_alias()) 4033 4034 return this 4035 4036 def _parse_version(self) -> t.Optional[exp.Version]: 4037 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4038 this = "TIMESTAMP" 4039 elif self._match(TokenType.VERSION_SNAPSHOT): 4040 this = "VERSION" 4041 else: 4042 return None 4043 4044 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4045 kind = self._prev.text.upper() 4046 start = self._parse_bitwise() 4047 self._match_texts(("TO", "AND")) 4048 end = self._parse_bitwise() 4049 expression: t.Optional[exp.Expression] = self.expression( 4050 exp.Tuple, expressions=[start, end] 4051 ) 4052 elif self._match_text_seq("CONTAINED", "IN"): 4053 kind = "CONTAINED IN" 4054 expression = self.expression( 4055 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4056 ) 4057 elif self._match(TokenType.ALL): 4058 kind = "ALL" 4059 expression = None 4060 else: 4061 self._match_text_seq("AS", "OF") 4062 kind = "AS OF" 4063 expression = self._parse_type() 4064 4065 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4066 4067 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4068 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4069 index = self._index 4070 historical_data = None 4071 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4072 this = self._prev.text.upper() 4073 kind = ( 4074 self._match(TokenType.L_PAREN) 4075 and self._match_texts(self.HISTORICAL_DATA_KIND) 4076 and self._prev.text.upper() 4077 ) 4078 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4079 4080 if expression: 4081 self._match_r_paren() 4082 historical_data = self.expression( 4083 exp.HistoricalData, this=this, kind=kind, expression=expression 4084 ) 4085 else: 4086 self._retreat(index) 4087 4088 return historical_data 4089 4090 def _parse_changes(self) -> t.Optional[exp.Changes]: 4091 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4092 return None 4093 4094 information = self._parse_var(any_token=True) 4095 self._match_r_paren() 4096 4097 return self.expression( 4098 exp.Changes, 4099 information=information, 4100 at_before=self._parse_historical_data(), 4101 end=self._parse_historical_data(), 4102 ) 4103 4104 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4105 if not self._match(TokenType.UNNEST): 4106 return None 4107 4108 expressions = self._parse_wrapped_csv(self._parse_equality) 4109 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4110 4111 alias = self._parse_table_alias() if with_alias else None 4112 4113 if alias: 4114 if self.dialect.UNNEST_COLUMN_ONLY: 4115 if alias.args.get("columns"): 4116 self.raise_error("Unexpected extra column alias in unnest.") 4117 4118 alias.set("columns", [alias.this]) 4119 alias.set("this", None) 4120 4121 columns = alias.args.get("columns") or [] 4122 if offset and len(expressions) < len(columns): 4123 offset = columns.pop() 4124 4125 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4126 self._match(TokenType.ALIAS) 4127 offset = self._parse_id_var( 4128 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4129 ) or exp.to_identifier("offset") 4130 4131 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4132 4133 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4134 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4135 if not is_derived and not ( 4136 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4137 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4138 ): 4139 return None 4140 4141 expressions = self._parse_csv(self._parse_value) 4142 alias = self._parse_table_alias() 4143 4144 if is_derived: 4145 self._match_r_paren() 4146 4147 return self.expression( 4148 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4149 ) 4150 4151 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4152 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4153 as_modifier and self._match_text_seq("USING", "SAMPLE") 4154 ): 4155 return None 4156 4157 bucket_numerator = None 4158 bucket_denominator = None 4159 bucket_field = None 4160 percent = None 4161 size = None 4162 seed = None 4163 4164 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4165 matched_l_paren = self._match(TokenType.L_PAREN) 4166 4167 if self.TABLESAMPLE_CSV: 4168 num = None 4169 expressions = self._parse_csv(self._parse_primary) 4170 else: 4171 expressions = None 4172 num = ( 4173 self._parse_factor() 4174 if self._match(TokenType.NUMBER, advance=False) 4175 else self._parse_primary() or self._parse_placeholder() 4176 ) 4177 4178 if self._match_text_seq("BUCKET"): 4179 bucket_numerator = self._parse_number() 4180 self._match_text_seq("OUT", "OF") 4181 bucket_denominator = bucket_denominator = self._parse_number() 4182 self._match(TokenType.ON) 4183 bucket_field = self._parse_field() 4184 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4185 percent = num 4186 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4187 size = num 4188 else: 4189 percent = num 4190 4191 if matched_l_paren: 4192 self._match_r_paren() 4193 4194 if self._match(TokenType.L_PAREN): 4195 method = self._parse_var(upper=True) 4196 seed = self._match(TokenType.COMMA) and self._parse_number() 4197 self._match_r_paren() 4198 elif self._match_texts(("SEED", "REPEATABLE")): 4199 seed = self._parse_wrapped(self._parse_number) 4200 4201 if not method and self.DEFAULT_SAMPLING_METHOD: 4202 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4203 4204 return self.expression( 4205 exp.TableSample, 4206 expressions=expressions, 4207 method=method, 4208 bucket_numerator=bucket_numerator, 4209 bucket_denominator=bucket_denominator, 4210 bucket_field=bucket_field, 4211 percent=percent, 4212 size=size, 4213 seed=seed, 4214 ) 4215 4216 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4217 return list(iter(self._parse_pivot, None)) or None 4218 4219 def _parse_joins(self) -> t.Iterator[exp.Join]: 4220 return iter(self._parse_join, None) 4221 4222 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4223 if not self._match(TokenType.INTO): 4224 return None 4225 4226 return self.expression( 4227 exp.UnpivotColumns, 4228 this=self._match_text_seq("NAME") and self._parse_column(), 4229 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4230 ) 4231 4232 # https://duckdb.org/docs/sql/statements/pivot 4233 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4234 def _parse_on() -> t.Optional[exp.Expression]: 4235 this = self._parse_bitwise() 4236 4237 if self._match(TokenType.IN): 4238 # PIVOT ... ON col IN (row_val1, row_val2) 4239 return self._parse_in(this) 4240 if self._match(TokenType.ALIAS, advance=False): 4241 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4242 return self._parse_alias(this) 4243 4244 return this 4245 4246 this = self._parse_table() 4247 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4248 into = self._parse_unpivot_columns() 4249 using = self._match(TokenType.USING) and self._parse_csv( 4250 lambda: self._parse_alias(self._parse_function()) 4251 ) 4252 group = self._parse_group() 4253 4254 return self.expression( 4255 exp.Pivot, 4256 this=this, 4257 expressions=expressions, 4258 using=using, 4259 group=group, 4260 unpivot=is_unpivot, 4261 into=into, 4262 ) 4263 4264 def _parse_pivot_in(self) -> exp.In: 4265 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4266 this = self._parse_select_or_expression() 4267 4268 self._match(TokenType.ALIAS) 4269 alias = self._parse_bitwise() 4270 if alias: 4271 if isinstance(alias, exp.Column) and not alias.db: 4272 alias = alias.this 4273 return self.expression(exp.PivotAlias, this=this, alias=alias) 4274 4275 return this 4276 4277 value = self._parse_column() 4278 4279 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4280 self.raise_error("Expecting IN (") 4281 4282 if self._match(TokenType.ANY): 4283 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4284 else: 4285 exprs = self._parse_csv(_parse_aliased_expression) 4286 4287 self._match_r_paren() 4288 return self.expression(exp.In, this=value, expressions=exprs) 4289 4290 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4291 index = self._index 4292 include_nulls = None 4293 4294 if self._match(TokenType.PIVOT): 4295 unpivot = False 4296 elif self._match(TokenType.UNPIVOT): 4297 unpivot = True 4298 4299 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4300 if self._match_text_seq("INCLUDE", "NULLS"): 4301 include_nulls = True 4302 elif self._match_text_seq("EXCLUDE", "NULLS"): 4303 include_nulls = False 4304 else: 4305 return None 4306 4307 expressions = [] 4308 4309 if not self._match(TokenType.L_PAREN): 4310 self._retreat(index) 4311 return None 4312 4313 if unpivot: 4314 expressions = self._parse_csv(self._parse_column) 4315 else: 4316 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4317 4318 if not expressions: 4319 self.raise_error("Failed to parse PIVOT's aggregation list") 4320 4321 if not self._match(TokenType.FOR): 4322 self.raise_error("Expecting FOR") 4323 4324 fields = [] 4325 while True: 4326 field = self._try_parse(self._parse_pivot_in) 4327 if not field: 4328 break 4329 fields.append(field) 4330 4331 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4332 self._parse_bitwise 4333 ) 4334 4335 group = self._parse_group() 4336 4337 self._match_r_paren() 4338 4339 pivot = self.expression( 4340 exp.Pivot, 4341 expressions=expressions, 4342 fields=fields, 4343 unpivot=unpivot, 4344 include_nulls=include_nulls, 4345 default_on_null=default_on_null, 4346 group=group, 4347 ) 4348 4349 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4350 pivot.set("alias", self._parse_table_alias()) 4351 4352 if not unpivot: 4353 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4354 4355 columns: t.List[exp.Expression] = [] 4356 all_fields = [] 4357 for pivot_field in pivot.fields: 4358 pivot_field_expressions = pivot_field.expressions 4359 4360 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4361 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4362 continue 4363 4364 all_fields.append( 4365 [ 4366 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4367 for fld in pivot_field_expressions 4368 ] 4369 ) 4370 4371 if all_fields: 4372 if names: 4373 all_fields.append(names) 4374 4375 # Generate all possible combinations of the pivot columns 4376 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4377 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4378 for fld_parts_tuple in itertools.product(*all_fields): 4379 fld_parts = list(fld_parts_tuple) 4380 4381 if names and self.PREFIXED_PIVOT_COLUMNS: 4382 # Move the "name" to the front of the list 4383 fld_parts.insert(0, fld_parts.pop(-1)) 4384 4385 columns.append(exp.to_identifier("_".join(fld_parts))) 4386 4387 pivot.set("columns", columns) 4388 4389 return pivot 4390 4391 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4392 return [agg.alias for agg in aggregations if agg.alias] 4393 4394 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4395 if not skip_where_token and not self._match(TokenType.PREWHERE): 4396 return None 4397 4398 return self.expression( 4399 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4400 ) 4401 4402 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4403 if not skip_where_token and not self._match(TokenType.WHERE): 4404 return None 4405 4406 return self.expression( 4407 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4408 ) 4409 4410 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4411 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4412 return None 4413 4414 elements: t.Dict[str, t.Any] = defaultdict(list) 4415 4416 if self._match(TokenType.ALL): 4417 elements["all"] = True 4418 elif self._match(TokenType.DISTINCT): 4419 elements["all"] = False 4420 4421 while True: 4422 index = self._index 4423 4424 elements["expressions"].extend( 4425 self._parse_csv( 4426 lambda: None 4427 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4428 else self._parse_assignment() 4429 ) 4430 ) 4431 4432 before_with_index = self._index 4433 with_prefix = self._match(TokenType.WITH) 4434 4435 if self._match(TokenType.ROLLUP): 4436 elements["rollup"].append( 4437 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4438 ) 4439 elif self._match(TokenType.CUBE): 4440 elements["cube"].append( 4441 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4442 ) 4443 elif self._match(TokenType.GROUPING_SETS): 4444 elements["grouping_sets"].append( 4445 self.expression( 4446 exp.GroupingSets, 4447 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4448 ) 4449 ) 4450 elif self._match_text_seq("TOTALS"): 4451 elements["totals"] = True # type: ignore 4452 4453 if before_with_index <= self._index <= before_with_index + 1: 4454 self._retreat(before_with_index) 4455 break 4456 4457 if index == self._index: 4458 break 4459 4460 return self.expression(exp.Group, **elements) # type: ignore 4461 4462 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4463 return self.expression( 4464 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4465 ) 4466 4467 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4468 if self._match(TokenType.L_PAREN): 4469 grouping_set = self._parse_csv(self._parse_column) 4470 self._match_r_paren() 4471 return self.expression(exp.Tuple, expressions=grouping_set) 4472 4473 return self._parse_column() 4474 4475 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4476 if not skip_having_token and not self._match(TokenType.HAVING): 4477 return None 4478 return self.expression(exp.Having, this=self._parse_assignment()) 4479 4480 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4481 if not self._match(TokenType.QUALIFY): 4482 return None 4483 return self.expression(exp.Qualify, this=self._parse_assignment()) 4484 4485 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4486 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4487 exp.Prior, this=self._parse_bitwise() 4488 ) 4489 connect = self._parse_assignment() 4490 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4491 return connect 4492 4493 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4494 if skip_start_token: 4495 start = None 4496 elif self._match(TokenType.START_WITH): 4497 start = self._parse_assignment() 4498 else: 4499 return None 4500 4501 self._match(TokenType.CONNECT_BY) 4502 nocycle = self._match_text_seq("NOCYCLE") 4503 connect = self._parse_connect_with_prior() 4504 4505 if not start and self._match(TokenType.START_WITH): 4506 start = self._parse_assignment() 4507 4508 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4509 4510 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4511 this = self._parse_id_var(any_token=True) 4512 if self._match(TokenType.ALIAS): 4513 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4514 return this 4515 4516 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4517 if self._match_text_seq("INTERPOLATE"): 4518 return self._parse_wrapped_csv(self._parse_name_as_expression) 4519 return None 4520 4521 def _parse_order( 4522 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4523 ) -> t.Optional[exp.Expression]: 4524 siblings = None 4525 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4526 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4527 return this 4528 4529 siblings = True 4530 4531 return self.expression( 4532 exp.Order, 4533 this=this, 4534 expressions=self._parse_csv(self._parse_ordered), 4535 siblings=siblings, 4536 ) 4537 4538 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4539 if not self._match(token): 4540 return None 4541 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4542 4543 def _parse_ordered( 4544 self, parse_method: t.Optional[t.Callable] = None 4545 ) -> t.Optional[exp.Ordered]: 4546 this = parse_method() if parse_method else self._parse_assignment() 4547 if not this: 4548 return None 4549 4550 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4551 this = exp.var("ALL") 4552 4553 asc = self._match(TokenType.ASC) 4554 desc = self._match(TokenType.DESC) or (asc and False) 4555 4556 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4557 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4558 4559 nulls_first = is_nulls_first or False 4560 explicitly_null_ordered = is_nulls_first or is_nulls_last 4561 4562 if ( 4563 not explicitly_null_ordered 4564 and ( 4565 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4566 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4567 ) 4568 and self.dialect.NULL_ORDERING != "nulls_are_last" 4569 ): 4570 nulls_first = True 4571 4572 if self._match_text_seq("WITH", "FILL"): 4573 with_fill = self.expression( 4574 exp.WithFill, 4575 **{ # type: ignore 4576 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4577 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4578 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4579 "interpolate": self._parse_interpolate(), 4580 }, 4581 ) 4582 else: 4583 with_fill = None 4584 4585 return self.expression( 4586 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4587 ) 4588 4589 def _parse_limit_options(self) -> exp.LimitOptions: 4590 percent = self._match(TokenType.PERCENT) 4591 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4592 self._match_text_seq("ONLY") 4593 with_ties = self._match_text_seq("WITH", "TIES") 4594 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4595 4596 def _parse_limit( 4597 self, 4598 this: t.Optional[exp.Expression] = None, 4599 top: bool = False, 4600 skip_limit_token: bool = False, 4601 ) -> t.Optional[exp.Expression]: 4602 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4603 comments = self._prev_comments 4604 if top: 4605 limit_paren = self._match(TokenType.L_PAREN) 4606 expression = self._parse_term() if limit_paren else self._parse_number() 4607 4608 if limit_paren: 4609 self._match_r_paren() 4610 4611 limit_options = self._parse_limit_options() 4612 else: 4613 limit_options = None 4614 expression = self._parse_term() 4615 4616 if self._match(TokenType.COMMA): 4617 offset = expression 4618 expression = self._parse_term() 4619 else: 4620 offset = None 4621 4622 limit_exp = self.expression( 4623 exp.Limit, 4624 this=this, 4625 expression=expression, 4626 offset=offset, 4627 comments=comments, 4628 limit_options=limit_options, 4629 expressions=self._parse_limit_by(), 4630 ) 4631 4632 return limit_exp 4633 4634 if self._match(TokenType.FETCH): 4635 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4636 direction = self._prev.text.upper() if direction else "FIRST" 4637 4638 count = self._parse_field(tokens=self.FETCH_TOKENS) 4639 4640 return self.expression( 4641 exp.Fetch, 4642 direction=direction, 4643 count=count, 4644 limit_options=self._parse_limit_options(), 4645 ) 4646 4647 return this 4648 4649 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4650 if not self._match(TokenType.OFFSET): 4651 return this 4652 4653 count = self._parse_term() 4654 self._match_set((TokenType.ROW, TokenType.ROWS)) 4655 4656 return self.expression( 4657 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4658 ) 4659 4660 def _can_parse_limit_or_offset(self) -> bool: 4661 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4662 return False 4663 4664 index = self._index 4665 result = bool( 4666 self._try_parse(self._parse_limit, retreat=True) 4667 or self._try_parse(self._parse_offset, retreat=True) 4668 ) 4669 self._retreat(index) 4670 return result 4671 4672 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4673 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4674 4675 def _parse_locks(self) -> t.List[exp.Lock]: 4676 locks = [] 4677 while True: 4678 if self._match_text_seq("FOR", "UPDATE"): 4679 update = True 4680 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4681 "LOCK", "IN", "SHARE", "MODE" 4682 ): 4683 update = False 4684 else: 4685 break 4686 4687 expressions = None 4688 if self._match_text_seq("OF"): 4689 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4690 4691 wait: t.Optional[bool | exp.Expression] = None 4692 if self._match_text_seq("NOWAIT"): 4693 wait = True 4694 elif self._match_text_seq("WAIT"): 4695 wait = self._parse_primary() 4696 elif self._match_text_seq("SKIP", "LOCKED"): 4697 wait = False 4698 4699 locks.append( 4700 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4701 ) 4702 4703 return locks 4704 4705 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4706 start = self._index 4707 _, side_token, kind_token = self._parse_join_parts() 4708 4709 side = side_token.text if side_token else None 4710 kind = kind_token.text if kind_token else None 4711 4712 if not self._match_set(self.SET_OPERATIONS): 4713 self._retreat(start) 4714 return None 4715 4716 token_type = self._prev.token_type 4717 4718 if token_type == TokenType.UNION: 4719 operation: t.Type[exp.SetOperation] = exp.Union 4720 elif token_type == TokenType.EXCEPT: 4721 operation = exp.Except 4722 else: 4723 operation = exp.Intersect 4724 4725 comments = self._prev.comments 4726 4727 if self._match(TokenType.DISTINCT): 4728 distinct: t.Optional[bool] = True 4729 elif self._match(TokenType.ALL): 4730 distinct = False 4731 else: 4732 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4733 if distinct is None: 4734 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4735 4736 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4737 "STRICT", "CORRESPONDING" 4738 ) 4739 if self._match_text_seq("CORRESPONDING"): 4740 by_name = True 4741 if not side and not kind: 4742 kind = "INNER" 4743 4744 on_column_list = None 4745 if by_name and self._match_texts(("ON", "BY")): 4746 on_column_list = self._parse_wrapped_csv(self._parse_column) 4747 4748 expression = self._parse_select(nested=True, parse_set_operation=False) 4749 4750 return self.expression( 4751 operation, 4752 comments=comments, 4753 this=this, 4754 distinct=distinct, 4755 by_name=by_name, 4756 expression=expression, 4757 side=side, 4758 kind=kind, 4759 on=on_column_list, 4760 ) 4761 4762 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4763 while this: 4764 setop = self.parse_set_operation(this) 4765 if not setop: 4766 break 4767 this = setop 4768 4769 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4770 expression = this.expression 4771 4772 if expression: 4773 for arg in self.SET_OP_MODIFIERS: 4774 expr = expression.args.get(arg) 4775 if expr: 4776 this.set(arg, expr.pop()) 4777 4778 return this 4779 4780 def _parse_expression(self) -> t.Optional[exp.Expression]: 4781 return self._parse_alias(self._parse_assignment()) 4782 4783 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4784 this = self._parse_disjunction() 4785 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4786 # This allows us to parse <non-identifier token> := <expr> 4787 this = exp.column( 4788 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4789 ) 4790 4791 while self._match_set(self.ASSIGNMENT): 4792 if isinstance(this, exp.Column) and len(this.parts) == 1: 4793 this = this.this 4794 4795 this = self.expression( 4796 self.ASSIGNMENT[self._prev.token_type], 4797 this=this, 4798 comments=self._prev_comments, 4799 expression=self._parse_assignment(), 4800 ) 4801 4802 return this 4803 4804 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4805 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4806 4807 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4808 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4809 4810 def _parse_equality(self) -> t.Optional[exp.Expression]: 4811 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4812 4813 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4814 return self._parse_tokens(self._parse_range, self.COMPARISON) 4815 4816 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4817 this = this or self._parse_bitwise() 4818 negate = self._match(TokenType.NOT) 4819 4820 if self._match_set(self.RANGE_PARSERS): 4821 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4822 if not expression: 4823 return this 4824 4825 this = expression 4826 elif self._match(TokenType.ISNULL): 4827 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4828 4829 # Postgres supports ISNULL and NOTNULL for conditions. 4830 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4831 if self._match(TokenType.NOTNULL): 4832 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4833 this = self.expression(exp.Not, this=this) 4834 4835 if negate: 4836 this = self._negate_range(this) 4837 4838 if self._match(TokenType.IS): 4839 this = self._parse_is(this) 4840 4841 return this 4842 4843 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4844 if not this: 4845 return this 4846 4847 return self.expression(exp.Not, this=this) 4848 4849 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4850 index = self._index - 1 4851 negate = self._match(TokenType.NOT) 4852 4853 if self._match_text_seq("DISTINCT", "FROM"): 4854 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4855 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4856 4857 if self._match(TokenType.JSON): 4858 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4859 4860 if self._match_text_seq("WITH"): 4861 _with = True 4862 elif self._match_text_seq("WITHOUT"): 4863 _with = False 4864 else: 4865 _with = None 4866 4867 unique = self._match(TokenType.UNIQUE) 4868 self._match_text_seq("KEYS") 4869 expression: t.Optional[exp.Expression] = self.expression( 4870 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4871 ) 4872 else: 4873 expression = self._parse_primary() or self._parse_null() 4874 if not expression: 4875 self._retreat(index) 4876 return None 4877 4878 this = self.expression(exp.Is, this=this, expression=expression) 4879 return self.expression(exp.Not, this=this) if negate else this 4880 4881 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4882 unnest = self._parse_unnest(with_alias=False) 4883 if unnest: 4884 this = self.expression(exp.In, this=this, unnest=unnest) 4885 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4886 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4887 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4888 4889 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4890 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4891 else: 4892 this = self.expression(exp.In, this=this, expressions=expressions) 4893 4894 if matched_l_paren: 4895 self._match_r_paren(this) 4896 elif not self._match(TokenType.R_BRACKET, expression=this): 4897 self.raise_error("Expecting ]") 4898 else: 4899 this = self.expression(exp.In, this=this, field=self._parse_column()) 4900 4901 return this 4902 4903 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4904 low = self._parse_bitwise() 4905 self._match(TokenType.AND) 4906 high = self._parse_bitwise() 4907 return self.expression(exp.Between, this=this, low=low, high=high) 4908 4909 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4910 if not self._match(TokenType.ESCAPE): 4911 return this 4912 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4913 4914 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4915 index = self._index 4916 4917 if not self._match(TokenType.INTERVAL) and match_interval: 4918 return None 4919 4920 if self._match(TokenType.STRING, advance=False): 4921 this = self._parse_primary() 4922 else: 4923 this = self._parse_term() 4924 4925 if not this or ( 4926 isinstance(this, exp.Column) 4927 and not this.table 4928 and not this.this.quoted 4929 and this.name.upper() == "IS" 4930 ): 4931 self._retreat(index) 4932 return None 4933 4934 unit = self._parse_function() or ( 4935 not self._match(TokenType.ALIAS, advance=False) 4936 and self._parse_var(any_token=True, upper=True) 4937 ) 4938 4939 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4940 # each INTERVAL expression into this canonical form so it's easy to transpile 4941 if this and this.is_number: 4942 this = exp.Literal.string(this.to_py()) 4943 elif this and this.is_string: 4944 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4945 if parts and unit: 4946 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4947 unit = None 4948 self._retreat(self._index - 1) 4949 4950 if len(parts) == 1: 4951 this = exp.Literal.string(parts[0][0]) 4952 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4953 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4954 unit = self.expression( 4955 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4956 ) 4957 4958 interval = self.expression(exp.Interval, this=this, unit=unit) 4959 4960 index = self._index 4961 self._match(TokenType.PLUS) 4962 4963 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4964 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4965 return self.expression( 4966 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4967 ) 4968 4969 self._retreat(index) 4970 return interval 4971 4972 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4973 this = self._parse_term() 4974 4975 while True: 4976 if self._match_set(self.BITWISE): 4977 this = self.expression( 4978 self.BITWISE[self._prev.token_type], 4979 this=this, 4980 expression=self._parse_term(), 4981 ) 4982 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4983 this = self.expression( 4984 exp.DPipe, 4985 this=this, 4986 expression=self._parse_term(), 4987 safe=not self.dialect.STRICT_STRING_CONCAT, 4988 ) 4989 elif self._match(TokenType.DQMARK): 4990 this = self.expression( 4991 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4992 ) 4993 elif self._match_pair(TokenType.LT, TokenType.LT): 4994 this = self.expression( 4995 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4996 ) 4997 elif self._match_pair(TokenType.GT, TokenType.GT): 4998 this = self.expression( 4999 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5000 ) 5001 else: 5002 break 5003 5004 return this 5005 5006 def _parse_term(self) -> t.Optional[exp.Expression]: 5007 this = self._parse_factor() 5008 5009 while self._match_set(self.TERM): 5010 klass = self.TERM[self._prev.token_type] 5011 comments = self._prev_comments 5012 expression = self._parse_factor() 5013 5014 this = self.expression(klass, this=this, comments=comments, expression=expression) 5015 5016 if isinstance(this, exp.Collate): 5017 expr = this.expression 5018 5019 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5020 # fallback to Identifier / Var 5021 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5022 ident = expr.this 5023 if isinstance(ident, exp.Identifier): 5024 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5025 5026 return this 5027 5028 def _parse_factor(self) -> t.Optional[exp.Expression]: 5029 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5030 this = parse_method() 5031 5032 while self._match_set(self.FACTOR): 5033 klass = self.FACTOR[self._prev.token_type] 5034 comments = self._prev_comments 5035 expression = parse_method() 5036 5037 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5038 self._retreat(self._index - 1) 5039 return this 5040 5041 this = self.expression(klass, this=this, comments=comments, expression=expression) 5042 5043 if isinstance(this, exp.Div): 5044 this.args["typed"] = self.dialect.TYPED_DIVISION 5045 this.args["safe"] = self.dialect.SAFE_DIVISION 5046 5047 return this 5048 5049 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5050 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5051 5052 def _parse_unary(self) -> t.Optional[exp.Expression]: 5053 if self._match_set(self.UNARY_PARSERS): 5054 return self.UNARY_PARSERS[self._prev.token_type](self) 5055 return self._parse_at_time_zone(self._parse_type()) 5056 5057 def _parse_type( 5058 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5059 ) -> t.Optional[exp.Expression]: 5060 interval = parse_interval and self._parse_interval() 5061 if interval: 5062 return interval 5063 5064 index = self._index 5065 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5066 5067 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5068 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5069 if isinstance(data_type, exp.Cast): 5070 # This constructor can contain ops directly after it, for instance struct unnesting: 5071 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5072 return self._parse_column_ops(data_type) 5073 5074 if data_type: 5075 index2 = self._index 5076 this = self._parse_primary() 5077 5078 if isinstance(this, exp.Literal): 5079 this = self._parse_column_ops(this) 5080 5081 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5082 if parser: 5083 return parser(self, this, data_type) 5084 5085 return self.expression(exp.Cast, this=this, to=data_type) 5086 5087 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5088 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5089 # 5090 # If the index difference here is greater than 1, that means the parser itself must have 5091 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5092 # 5093 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5094 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5095 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5096 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5097 # 5098 # In these cases, we don't really want to return the converted type, but instead retreat 5099 # and try to parse a Column or Identifier in the section below. 5100 if data_type.expressions and index2 - index > 1: 5101 self._retreat(index2) 5102 return self._parse_column_ops(data_type) 5103 5104 self._retreat(index) 5105 5106 if fallback_to_identifier: 5107 return self._parse_id_var() 5108 5109 this = self._parse_column() 5110 return this and self._parse_column_ops(this) 5111 5112 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5113 this = self._parse_type() 5114 if not this: 5115 return None 5116 5117 if isinstance(this, exp.Column) and not this.table: 5118 this = exp.var(this.name.upper()) 5119 5120 return self.expression( 5121 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5122 ) 5123 5124 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5125 type_name = identifier.name 5126 5127 while self._match(TokenType.DOT): 5128 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5129 5130 return exp.DataType.build(type_name, udt=True) 5131 5132 def _parse_types( 5133 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5134 ) -> t.Optional[exp.Expression]: 5135 index = self._index 5136 5137 this: t.Optional[exp.Expression] = None 5138 prefix = self._match_text_seq("SYSUDTLIB", ".") 5139 5140 if not self._match_set(self.TYPE_TOKENS): 5141 identifier = allow_identifiers and self._parse_id_var( 5142 any_token=False, tokens=(TokenType.VAR,) 5143 ) 5144 if isinstance(identifier, exp.Identifier): 5145 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5146 5147 if len(tokens) != 1: 5148 self.raise_error("Unexpected identifier", self._prev) 5149 5150 if tokens[0].token_type in self.TYPE_TOKENS: 5151 self._prev = tokens[0] 5152 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5153 this = self._parse_user_defined_type(identifier) 5154 else: 5155 self._retreat(self._index - 1) 5156 return None 5157 else: 5158 return None 5159 5160 type_token = self._prev.token_type 5161 5162 if type_token == TokenType.PSEUDO_TYPE: 5163 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5164 5165 if type_token == TokenType.OBJECT_IDENTIFIER: 5166 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5167 5168 # https://materialize.com/docs/sql/types/map/ 5169 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5170 key_type = self._parse_types( 5171 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5172 ) 5173 if not self._match(TokenType.FARROW): 5174 self._retreat(index) 5175 return None 5176 5177 value_type = self._parse_types( 5178 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5179 ) 5180 if not self._match(TokenType.R_BRACKET): 5181 self._retreat(index) 5182 return None 5183 5184 return exp.DataType( 5185 this=exp.DataType.Type.MAP, 5186 expressions=[key_type, value_type], 5187 nested=True, 5188 prefix=prefix, 5189 ) 5190 5191 nested = type_token in self.NESTED_TYPE_TOKENS 5192 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5193 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5194 expressions = None 5195 maybe_func = False 5196 5197 if self._match(TokenType.L_PAREN): 5198 if is_struct: 5199 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5200 elif nested: 5201 expressions = self._parse_csv( 5202 lambda: self._parse_types( 5203 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5204 ) 5205 ) 5206 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5207 this = expressions[0] 5208 this.set("nullable", True) 5209 self._match_r_paren() 5210 return this 5211 elif type_token in self.ENUM_TYPE_TOKENS: 5212 expressions = self._parse_csv(self._parse_equality) 5213 elif is_aggregate: 5214 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5215 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5216 ) 5217 if not func_or_ident: 5218 return None 5219 expressions = [func_or_ident] 5220 if self._match(TokenType.COMMA): 5221 expressions.extend( 5222 self._parse_csv( 5223 lambda: self._parse_types( 5224 check_func=check_func, 5225 schema=schema, 5226 allow_identifiers=allow_identifiers, 5227 ) 5228 ) 5229 ) 5230 else: 5231 expressions = self._parse_csv(self._parse_type_size) 5232 5233 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5234 if type_token == TokenType.VECTOR and len(expressions) == 2: 5235 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5236 5237 if not expressions or not self._match(TokenType.R_PAREN): 5238 self._retreat(index) 5239 return None 5240 5241 maybe_func = True 5242 5243 values: t.Optional[t.List[exp.Expression]] = None 5244 5245 if nested and self._match(TokenType.LT): 5246 if is_struct: 5247 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5248 else: 5249 expressions = self._parse_csv( 5250 lambda: self._parse_types( 5251 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5252 ) 5253 ) 5254 5255 if not self._match(TokenType.GT): 5256 self.raise_error("Expecting >") 5257 5258 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5259 values = self._parse_csv(self._parse_assignment) 5260 if not values and is_struct: 5261 values = None 5262 self._retreat(self._index - 1) 5263 else: 5264 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5265 5266 if type_token in self.TIMESTAMPS: 5267 if self._match_text_seq("WITH", "TIME", "ZONE"): 5268 maybe_func = False 5269 tz_type = ( 5270 exp.DataType.Type.TIMETZ 5271 if type_token in self.TIMES 5272 else exp.DataType.Type.TIMESTAMPTZ 5273 ) 5274 this = exp.DataType(this=tz_type, expressions=expressions) 5275 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5276 maybe_func = False 5277 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5278 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5279 maybe_func = False 5280 elif type_token == TokenType.INTERVAL: 5281 unit = self._parse_var(upper=True) 5282 if unit: 5283 if self._match_text_seq("TO"): 5284 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5285 5286 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5287 else: 5288 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5289 elif type_token == TokenType.VOID: 5290 this = exp.DataType(this=exp.DataType.Type.NULL) 5291 5292 if maybe_func and check_func: 5293 index2 = self._index 5294 peek = self._parse_string() 5295 5296 if not peek: 5297 self._retreat(index) 5298 return None 5299 5300 self._retreat(index2) 5301 5302 if not this: 5303 if self._match_text_seq("UNSIGNED"): 5304 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5305 if not unsigned_type_token: 5306 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5307 5308 type_token = unsigned_type_token or type_token 5309 5310 this = exp.DataType( 5311 this=exp.DataType.Type[type_token.value], 5312 expressions=expressions, 5313 nested=nested, 5314 prefix=prefix, 5315 ) 5316 5317 # Empty arrays/structs are allowed 5318 if values is not None: 5319 cls = exp.Struct if is_struct else exp.Array 5320 this = exp.cast(cls(expressions=values), this, copy=False) 5321 5322 elif expressions: 5323 this.set("expressions", expressions) 5324 5325 # https://materialize.com/docs/sql/types/list/#type-name 5326 while self._match(TokenType.LIST): 5327 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5328 5329 index = self._index 5330 5331 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5332 matched_array = self._match(TokenType.ARRAY) 5333 5334 while self._curr: 5335 datatype_token = self._prev.token_type 5336 matched_l_bracket = self._match(TokenType.L_BRACKET) 5337 5338 if (not matched_l_bracket and not matched_array) or ( 5339 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5340 ): 5341 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5342 # not to be confused with the fixed size array parsing 5343 break 5344 5345 matched_array = False 5346 values = self._parse_csv(self._parse_assignment) or None 5347 if ( 5348 values 5349 and not schema 5350 and ( 5351 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5352 ) 5353 ): 5354 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5355 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5356 self._retreat(index) 5357 break 5358 5359 this = exp.DataType( 5360 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5361 ) 5362 self._match(TokenType.R_BRACKET) 5363 5364 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5365 converter = self.TYPE_CONVERTERS.get(this.this) 5366 if converter: 5367 this = converter(t.cast(exp.DataType, this)) 5368 5369 return this 5370 5371 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5372 index = self._index 5373 5374 if ( 5375 self._curr 5376 and self._next 5377 and self._curr.token_type in self.TYPE_TOKENS 5378 and self._next.token_type in self.TYPE_TOKENS 5379 ): 5380 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5381 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5382 this = self._parse_id_var() 5383 else: 5384 this = ( 5385 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5386 or self._parse_id_var() 5387 ) 5388 5389 self._match(TokenType.COLON) 5390 5391 if ( 5392 type_required 5393 and not isinstance(this, exp.DataType) 5394 and not self._match_set(self.TYPE_TOKENS, advance=False) 5395 ): 5396 self._retreat(index) 5397 return self._parse_types() 5398 5399 return self._parse_column_def(this) 5400 5401 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5402 if not self._match_text_seq("AT", "TIME", "ZONE"): 5403 return this 5404 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5405 5406 def _parse_column(self) -> t.Optional[exp.Expression]: 5407 this = self._parse_column_reference() 5408 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5409 5410 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5411 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5412 5413 return column 5414 5415 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5416 this = self._parse_field() 5417 if ( 5418 not this 5419 and self._match(TokenType.VALUES, advance=False) 5420 and self.VALUES_FOLLOWED_BY_PAREN 5421 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5422 ): 5423 this = self._parse_id_var() 5424 5425 if isinstance(this, exp.Identifier): 5426 # We bubble up comments from the Identifier to the Column 5427 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5428 5429 return this 5430 5431 def _parse_colon_as_variant_extract( 5432 self, this: t.Optional[exp.Expression] 5433 ) -> t.Optional[exp.Expression]: 5434 casts = [] 5435 json_path = [] 5436 escape = None 5437 5438 while self._match(TokenType.COLON): 5439 start_index = self._index 5440 5441 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5442 path = self._parse_column_ops( 5443 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5444 ) 5445 5446 # The cast :: operator has a lower precedence than the extraction operator :, so 5447 # we rearrange the AST appropriately to avoid casting the JSON path 5448 while isinstance(path, exp.Cast): 5449 casts.append(path.to) 5450 path = path.this 5451 5452 if casts: 5453 dcolon_offset = next( 5454 i 5455 for i, t in enumerate(self._tokens[start_index:]) 5456 if t.token_type == TokenType.DCOLON 5457 ) 5458 end_token = self._tokens[start_index + dcolon_offset - 1] 5459 else: 5460 end_token = self._prev 5461 5462 if path: 5463 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5464 # it'll roundtrip to a string literal in GET_PATH 5465 if isinstance(path, exp.Identifier) and path.quoted: 5466 escape = True 5467 5468 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5469 5470 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5471 # Databricks transforms it back to the colon/dot notation 5472 if json_path: 5473 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5474 5475 if json_path_expr: 5476 json_path_expr.set("escape", escape) 5477 5478 this = self.expression( 5479 exp.JSONExtract, 5480 this=this, 5481 expression=json_path_expr, 5482 variant_extract=True, 5483 ) 5484 5485 while casts: 5486 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5487 5488 return this 5489 5490 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5491 return self._parse_types() 5492 5493 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5494 this = self._parse_bracket(this) 5495 5496 while self._match_set(self.COLUMN_OPERATORS): 5497 op_token = self._prev.token_type 5498 op = self.COLUMN_OPERATORS.get(op_token) 5499 5500 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5501 field = self._parse_dcolon() 5502 if not field: 5503 self.raise_error("Expected type") 5504 elif op and self._curr: 5505 field = self._parse_column_reference() or self._parse_bracket() 5506 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5507 field = self._parse_column_ops(field) 5508 else: 5509 field = self._parse_field(any_token=True, anonymous_func=True) 5510 5511 # Function calls can be qualified, e.g., x.y.FOO() 5512 # This converts the final AST to a series of Dots leading to the function call 5513 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5514 if isinstance(field, (exp.Func, exp.Window)) and this: 5515 this = this.transform( 5516 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5517 ) 5518 5519 if op: 5520 this = op(self, this, field) 5521 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5522 this = self.expression( 5523 exp.Column, 5524 comments=this.comments, 5525 this=field, 5526 table=this.this, 5527 db=this.args.get("table"), 5528 catalog=this.args.get("db"), 5529 ) 5530 elif isinstance(field, exp.Window): 5531 # Move the exp.Dot's to the window's function 5532 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5533 field.set("this", window_func) 5534 this = field 5535 else: 5536 this = self.expression(exp.Dot, this=this, expression=field) 5537 5538 if field and field.comments: 5539 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5540 5541 this = self._parse_bracket(this) 5542 5543 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5544 5545 def _parse_primary(self) -> t.Optional[exp.Expression]: 5546 if self._match_set(self.PRIMARY_PARSERS): 5547 token_type = self._prev.token_type 5548 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5549 5550 if token_type == TokenType.STRING: 5551 expressions = [primary] 5552 while self._match(TokenType.STRING): 5553 expressions.append(exp.Literal.string(self._prev.text)) 5554 5555 if len(expressions) > 1: 5556 return self.expression(exp.Concat, expressions=expressions) 5557 5558 return primary 5559 5560 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5561 return exp.Literal.number(f"0.{self._prev.text}") 5562 5563 if self._match(TokenType.L_PAREN): 5564 comments = self._prev_comments 5565 query = self._parse_select() 5566 5567 if query: 5568 expressions = [query] 5569 else: 5570 expressions = self._parse_expressions() 5571 5572 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5573 5574 if not this and self._match(TokenType.R_PAREN, advance=False): 5575 this = self.expression(exp.Tuple) 5576 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5577 this = self._parse_subquery(this=this, parse_alias=False) 5578 elif isinstance(this, exp.Subquery): 5579 this = self._parse_subquery( 5580 this=self._parse_set_operations(this), parse_alias=False 5581 ) 5582 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5583 this = self.expression(exp.Tuple, expressions=expressions) 5584 else: 5585 this = self.expression(exp.Paren, this=this) 5586 5587 if this: 5588 this.add_comments(comments) 5589 5590 self._match_r_paren(expression=this) 5591 return this 5592 5593 return None 5594 5595 def _parse_field( 5596 self, 5597 any_token: bool = False, 5598 tokens: t.Optional[t.Collection[TokenType]] = None, 5599 anonymous_func: bool = False, 5600 ) -> t.Optional[exp.Expression]: 5601 if anonymous_func: 5602 field = ( 5603 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5604 or self._parse_primary() 5605 ) 5606 else: 5607 field = self._parse_primary() or self._parse_function( 5608 anonymous=anonymous_func, any_token=any_token 5609 ) 5610 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5611 5612 def _parse_function( 5613 self, 5614 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5615 anonymous: bool = False, 5616 optional_parens: bool = True, 5617 any_token: bool = False, 5618 ) -> t.Optional[exp.Expression]: 5619 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5620 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5621 fn_syntax = False 5622 if ( 5623 self._match(TokenType.L_BRACE, advance=False) 5624 and self._next 5625 and self._next.text.upper() == "FN" 5626 ): 5627 self._advance(2) 5628 fn_syntax = True 5629 5630 func = self._parse_function_call( 5631 functions=functions, 5632 anonymous=anonymous, 5633 optional_parens=optional_parens, 5634 any_token=any_token, 5635 ) 5636 5637 if fn_syntax: 5638 self._match(TokenType.R_BRACE) 5639 5640 return func 5641 5642 def _parse_function_call( 5643 self, 5644 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5645 anonymous: bool = False, 5646 optional_parens: bool = True, 5647 any_token: bool = False, 5648 ) -> t.Optional[exp.Expression]: 5649 if not self._curr: 5650 return None 5651 5652 comments = self._curr.comments 5653 token = self._curr 5654 token_type = self._curr.token_type 5655 this = self._curr.text 5656 upper = this.upper() 5657 5658 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5659 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5660 self._advance() 5661 return self._parse_window(parser(self)) 5662 5663 if not self._next or self._next.token_type != TokenType.L_PAREN: 5664 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5665 self._advance() 5666 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5667 5668 return None 5669 5670 if any_token: 5671 if token_type in self.RESERVED_TOKENS: 5672 return None 5673 elif token_type not in self.FUNC_TOKENS: 5674 return None 5675 5676 self._advance(2) 5677 5678 parser = self.FUNCTION_PARSERS.get(upper) 5679 if parser and not anonymous: 5680 this = parser(self) 5681 else: 5682 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5683 5684 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5685 this = self.expression( 5686 subquery_predicate, comments=comments, this=self._parse_select() 5687 ) 5688 self._match_r_paren() 5689 return this 5690 5691 if functions is None: 5692 functions = self.FUNCTIONS 5693 5694 function = functions.get(upper) 5695 known_function = function and not anonymous 5696 5697 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5698 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5699 5700 post_func_comments = self._curr and self._curr.comments 5701 if known_function and post_func_comments: 5702 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5703 # call we'll construct it as exp.Anonymous, even if it's "known" 5704 if any( 5705 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5706 for comment in post_func_comments 5707 ): 5708 known_function = False 5709 5710 if alias and known_function: 5711 args = self._kv_to_prop_eq(args) 5712 5713 if known_function: 5714 func_builder = t.cast(t.Callable, function) 5715 5716 if "dialect" in func_builder.__code__.co_varnames: 5717 func = func_builder(args, dialect=self.dialect) 5718 else: 5719 func = func_builder(args) 5720 5721 func = self.validate_expression(func, args) 5722 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5723 func.meta["name"] = this 5724 5725 this = func 5726 else: 5727 if token_type == TokenType.IDENTIFIER: 5728 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5729 5730 this = self.expression(exp.Anonymous, this=this, expressions=args) 5731 this = this.update_positions(token) 5732 5733 if isinstance(this, exp.Expression): 5734 this.add_comments(comments) 5735 5736 self._match_r_paren(this) 5737 return self._parse_window(this) 5738 5739 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5740 return expression 5741 5742 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5743 transformed = [] 5744 5745 for index, e in enumerate(expressions): 5746 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5747 if isinstance(e, exp.Alias): 5748 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5749 5750 if not isinstance(e, exp.PropertyEQ): 5751 e = self.expression( 5752 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5753 ) 5754 5755 if isinstance(e.this, exp.Column): 5756 e.this.replace(e.this.this) 5757 else: 5758 e = self._to_prop_eq(e, index) 5759 5760 transformed.append(e) 5761 5762 return transformed 5763 5764 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5765 return self._parse_statement() 5766 5767 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5768 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5769 5770 def _parse_user_defined_function( 5771 self, kind: t.Optional[TokenType] = None 5772 ) -> t.Optional[exp.Expression]: 5773 this = self._parse_table_parts(schema=True) 5774 5775 if not self._match(TokenType.L_PAREN): 5776 return this 5777 5778 expressions = self._parse_csv(self._parse_function_parameter) 5779 self._match_r_paren() 5780 return self.expression( 5781 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5782 ) 5783 5784 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5785 literal = self._parse_primary() 5786 if literal: 5787 return self.expression(exp.Introducer, this=token.text, expression=literal) 5788 5789 return self._identifier_expression(token) 5790 5791 def _parse_session_parameter(self) -> exp.SessionParameter: 5792 kind = None 5793 this = self._parse_id_var() or self._parse_primary() 5794 5795 if this and self._match(TokenType.DOT): 5796 kind = this.name 5797 this = self._parse_var() or self._parse_primary() 5798 5799 return self.expression(exp.SessionParameter, this=this, kind=kind) 5800 5801 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5802 return self._parse_id_var() 5803 5804 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5805 index = self._index 5806 5807 if self._match(TokenType.L_PAREN): 5808 expressions = t.cast( 5809 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5810 ) 5811 5812 if not self._match(TokenType.R_PAREN): 5813 self._retreat(index) 5814 else: 5815 expressions = [self._parse_lambda_arg()] 5816 5817 if self._match_set(self.LAMBDAS): 5818 return self.LAMBDAS[self._prev.token_type](self, expressions) 5819 5820 self._retreat(index) 5821 5822 this: t.Optional[exp.Expression] 5823 5824 if self._match(TokenType.DISTINCT): 5825 this = self.expression( 5826 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5827 ) 5828 else: 5829 this = self._parse_select_or_expression(alias=alias) 5830 5831 return self._parse_limit( 5832 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5833 ) 5834 5835 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5836 index = self._index 5837 if not self._match(TokenType.L_PAREN): 5838 return this 5839 5840 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5841 # expr can be of both types 5842 if self._match_set(self.SELECT_START_TOKENS): 5843 self._retreat(index) 5844 return this 5845 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5846 self._match_r_paren() 5847 return self.expression(exp.Schema, this=this, expressions=args) 5848 5849 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5850 return self._parse_column_def(self._parse_field(any_token=True)) 5851 5852 def _parse_column_def( 5853 self, this: t.Optional[exp.Expression], computed_column: bool = True 5854 ) -> t.Optional[exp.Expression]: 5855 # column defs are not really columns, they're identifiers 5856 if isinstance(this, exp.Column): 5857 this = this.this 5858 5859 if not computed_column: 5860 self._match(TokenType.ALIAS) 5861 5862 kind = self._parse_types(schema=True) 5863 5864 if self._match_text_seq("FOR", "ORDINALITY"): 5865 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5866 5867 constraints: t.List[exp.Expression] = [] 5868 5869 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5870 ("ALIAS", "MATERIALIZED") 5871 ): 5872 persisted = self._prev.text.upper() == "MATERIALIZED" 5873 constraint_kind = exp.ComputedColumnConstraint( 5874 this=self._parse_assignment(), 5875 persisted=persisted or self._match_text_seq("PERSISTED"), 5876 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5877 ) 5878 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5879 elif ( 5880 kind 5881 and self._match(TokenType.ALIAS, advance=False) 5882 and ( 5883 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5884 or (self._next and self._next.token_type == TokenType.L_PAREN) 5885 ) 5886 ): 5887 self._advance() 5888 constraints.append( 5889 self.expression( 5890 exp.ColumnConstraint, 5891 kind=exp.ComputedColumnConstraint( 5892 this=self._parse_disjunction(), 5893 persisted=self._match_texts(("STORED", "VIRTUAL")) 5894 and self._prev.text.upper() == "STORED", 5895 ), 5896 ) 5897 ) 5898 5899 while True: 5900 constraint = self._parse_column_constraint() 5901 if not constraint: 5902 break 5903 constraints.append(constraint) 5904 5905 if not kind and not constraints: 5906 return this 5907 5908 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5909 5910 def _parse_auto_increment( 5911 self, 5912 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5913 start = None 5914 increment = None 5915 5916 if self._match(TokenType.L_PAREN, advance=False): 5917 args = self._parse_wrapped_csv(self._parse_bitwise) 5918 start = seq_get(args, 0) 5919 increment = seq_get(args, 1) 5920 elif self._match_text_seq("START"): 5921 start = self._parse_bitwise() 5922 self._match_text_seq("INCREMENT") 5923 increment = self._parse_bitwise() 5924 5925 if start and increment: 5926 return exp.GeneratedAsIdentityColumnConstraint( 5927 start=start, increment=increment, this=False 5928 ) 5929 5930 return exp.AutoIncrementColumnConstraint() 5931 5932 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5933 if not self._match_text_seq("REFRESH"): 5934 self._retreat(self._index - 1) 5935 return None 5936 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5937 5938 def _parse_compress(self) -> exp.CompressColumnConstraint: 5939 if self._match(TokenType.L_PAREN, advance=False): 5940 return self.expression( 5941 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5942 ) 5943 5944 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5945 5946 def _parse_generated_as_identity( 5947 self, 5948 ) -> ( 5949 exp.GeneratedAsIdentityColumnConstraint 5950 | exp.ComputedColumnConstraint 5951 | exp.GeneratedAsRowColumnConstraint 5952 ): 5953 if self._match_text_seq("BY", "DEFAULT"): 5954 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5955 this = self.expression( 5956 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5957 ) 5958 else: 5959 self._match_text_seq("ALWAYS") 5960 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5961 5962 self._match(TokenType.ALIAS) 5963 5964 if self._match_text_seq("ROW"): 5965 start = self._match_text_seq("START") 5966 if not start: 5967 self._match(TokenType.END) 5968 hidden = self._match_text_seq("HIDDEN") 5969 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5970 5971 identity = self._match_text_seq("IDENTITY") 5972 5973 if self._match(TokenType.L_PAREN): 5974 if self._match(TokenType.START_WITH): 5975 this.set("start", self._parse_bitwise()) 5976 if self._match_text_seq("INCREMENT", "BY"): 5977 this.set("increment", self._parse_bitwise()) 5978 if self._match_text_seq("MINVALUE"): 5979 this.set("minvalue", self._parse_bitwise()) 5980 if self._match_text_seq("MAXVALUE"): 5981 this.set("maxvalue", self._parse_bitwise()) 5982 5983 if self._match_text_seq("CYCLE"): 5984 this.set("cycle", True) 5985 elif self._match_text_seq("NO", "CYCLE"): 5986 this.set("cycle", False) 5987 5988 if not identity: 5989 this.set("expression", self._parse_range()) 5990 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5991 args = self._parse_csv(self._parse_bitwise) 5992 this.set("start", seq_get(args, 0)) 5993 this.set("increment", seq_get(args, 1)) 5994 5995 self._match_r_paren() 5996 5997 return this 5998 5999 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6000 self._match_text_seq("LENGTH") 6001 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6002 6003 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6004 if self._match_text_seq("NULL"): 6005 return self.expression(exp.NotNullColumnConstraint) 6006 if self._match_text_seq("CASESPECIFIC"): 6007 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6008 if self._match_text_seq("FOR", "REPLICATION"): 6009 return self.expression(exp.NotForReplicationColumnConstraint) 6010 6011 # Unconsume the `NOT` token 6012 self._retreat(self._index - 1) 6013 return None 6014 6015 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6016 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6017 6018 procedure_option_follows = ( 6019 self._match(TokenType.WITH, advance=False) 6020 and self._next 6021 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6022 ) 6023 6024 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6025 return self.expression( 6026 exp.ColumnConstraint, 6027 this=this, 6028 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6029 ) 6030 6031 return this 6032 6033 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6034 if not self._match(TokenType.CONSTRAINT): 6035 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6036 6037 return self.expression( 6038 exp.Constraint, 6039 this=self._parse_id_var(), 6040 expressions=self._parse_unnamed_constraints(), 6041 ) 6042 6043 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6044 constraints = [] 6045 while True: 6046 constraint = self._parse_unnamed_constraint() or self._parse_function() 6047 if not constraint: 6048 break 6049 constraints.append(constraint) 6050 6051 return constraints 6052 6053 def _parse_unnamed_constraint( 6054 self, constraints: t.Optional[t.Collection[str]] = None 6055 ) -> t.Optional[exp.Expression]: 6056 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6057 constraints or self.CONSTRAINT_PARSERS 6058 ): 6059 return None 6060 6061 constraint = self._prev.text.upper() 6062 if constraint not in self.CONSTRAINT_PARSERS: 6063 self.raise_error(f"No parser found for schema constraint {constraint}.") 6064 6065 return self.CONSTRAINT_PARSERS[constraint](self) 6066 6067 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6068 return self._parse_id_var(any_token=False) 6069 6070 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6071 self._match_text_seq("KEY") 6072 return self.expression( 6073 exp.UniqueColumnConstraint, 6074 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6075 this=self._parse_schema(self._parse_unique_key()), 6076 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6077 on_conflict=self._parse_on_conflict(), 6078 options=self._parse_key_constraint_options(), 6079 ) 6080 6081 def _parse_key_constraint_options(self) -> t.List[str]: 6082 options = [] 6083 while True: 6084 if not self._curr: 6085 break 6086 6087 if self._match(TokenType.ON): 6088 action = None 6089 on = self._advance_any() and self._prev.text 6090 6091 if self._match_text_seq("NO", "ACTION"): 6092 action = "NO ACTION" 6093 elif self._match_text_seq("CASCADE"): 6094 action = "CASCADE" 6095 elif self._match_text_seq("RESTRICT"): 6096 action = "RESTRICT" 6097 elif self._match_pair(TokenType.SET, TokenType.NULL): 6098 action = "SET NULL" 6099 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6100 action = "SET DEFAULT" 6101 else: 6102 self.raise_error("Invalid key constraint") 6103 6104 options.append(f"ON {on} {action}") 6105 else: 6106 var = self._parse_var_from_options( 6107 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6108 ) 6109 if not var: 6110 break 6111 options.append(var.name) 6112 6113 return options 6114 6115 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6116 if match and not self._match(TokenType.REFERENCES): 6117 return None 6118 6119 expressions = None 6120 this = self._parse_table(schema=True) 6121 options = self._parse_key_constraint_options() 6122 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6123 6124 def _parse_foreign_key(self) -> exp.ForeignKey: 6125 expressions = ( 6126 self._parse_wrapped_id_vars() 6127 if not self._match(TokenType.REFERENCES, advance=False) 6128 else None 6129 ) 6130 reference = self._parse_references() 6131 on_options = {} 6132 6133 while self._match(TokenType.ON): 6134 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6135 self.raise_error("Expected DELETE or UPDATE") 6136 6137 kind = self._prev.text.lower() 6138 6139 if self._match_text_seq("NO", "ACTION"): 6140 action = "NO ACTION" 6141 elif self._match(TokenType.SET): 6142 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6143 action = "SET " + self._prev.text.upper() 6144 else: 6145 self._advance() 6146 action = self._prev.text.upper() 6147 6148 on_options[kind] = action 6149 6150 return self.expression( 6151 exp.ForeignKey, 6152 expressions=expressions, 6153 reference=reference, 6154 options=self._parse_key_constraint_options(), 6155 **on_options, # type: ignore 6156 ) 6157 6158 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6159 return self._parse_ordered() or self._parse_field() 6160 6161 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6162 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6163 self._retreat(self._index - 1) 6164 return None 6165 6166 id_vars = self._parse_wrapped_id_vars() 6167 return self.expression( 6168 exp.PeriodForSystemTimeConstraint, 6169 this=seq_get(id_vars, 0), 6170 expression=seq_get(id_vars, 1), 6171 ) 6172 6173 def _parse_primary_key( 6174 self, wrapped_optional: bool = False, in_props: bool = False 6175 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6176 desc = ( 6177 self._match_set((TokenType.ASC, TokenType.DESC)) 6178 and self._prev.token_type == TokenType.DESC 6179 ) 6180 6181 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6182 return self.expression( 6183 exp.PrimaryKeyColumnConstraint, 6184 desc=desc, 6185 options=self._parse_key_constraint_options(), 6186 ) 6187 6188 expressions = self._parse_wrapped_csv( 6189 self._parse_primary_key_part, optional=wrapped_optional 6190 ) 6191 options = self._parse_key_constraint_options() 6192 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6193 6194 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6195 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6196 6197 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6198 """ 6199 Parses a datetime column in ODBC format. We parse the column into the corresponding 6200 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6201 same as we did for `DATE('yyyy-mm-dd')`. 6202 6203 Reference: 6204 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6205 """ 6206 self._match(TokenType.VAR) 6207 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6208 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6209 if not self._match(TokenType.R_BRACE): 6210 self.raise_error("Expected }") 6211 return expression 6212 6213 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6214 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6215 return this 6216 6217 bracket_kind = self._prev.token_type 6218 if ( 6219 bracket_kind == TokenType.L_BRACE 6220 and self._curr 6221 and self._curr.token_type == TokenType.VAR 6222 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6223 ): 6224 return self._parse_odbc_datetime_literal() 6225 6226 expressions = self._parse_csv( 6227 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6228 ) 6229 6230 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6231 self.raise_error("Expected ]") 6232 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6233 self.raise_error("Expected }") 6234 6235 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6236 if bracket_kind == TokenType.L_BRACE: 6237 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6238 elif not this: 6239 this = build_array_constructor( 6240 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6241 ) 6242 else: 6243 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6244 if constructor_type: 6245 return build_array_constructor( 6246 constructor_type, 6247 args=expressions, 6248 bracket_kind=bracket_kind, 6249 dialect=self.dialect, 6250 ) 6251 6252 expressions = apply_index_offset( 6253 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6254 ) 6255 this = self.expression( 6256 exp.Bracket, 6257 this=this, 6258 expressions=expressions, 6259 comments=this.pop_comments(), 6260 ) 6261 6262 self._add_comments(this) 6263 return self._parse_bracket(this) 6264 6265 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6266 if self._match(TokenType.COLON): 6267 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6268 return this 6269 6270 def _parse_case(self) -> t.Optional[exp.Expression]: 6271 ifs = [] 6272 default = None 6273 6274 comments = self._prev_comments 6275 expression = self._parse_assignment() 6276 6277 while self._match(TokenType.WHEN): 6278 this = self._parse_assignment() 6279 self._match(TokenType.THEN) 6280 then = self._parse_assignment() 6281 ifs.append(self.expression(exp.If, this=this, true=then)) 6282 6283 if self._match(TokenType.ELSE): 6284 default = self._parse_assignment() 6285 6286 if not self._match(TokenType.END): 6287 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6288 default = exp.column("interval") 6289 else: 6290 self.raise_error("Expected END after CASE", self._prev) 6291 6292 return self.expression( 6293 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6294 ) 6295 6296 def _parse_if(self) -> t.Optional[exp.Expression]: 6297 if self._match(TokenType.L_PAREN): 6298 args = self._parse_csv( 6299 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6300 ) 6301 this = self.validate_expression(exp.If.from_arg_list(args), args) 6302 self._match_r_paren() 6303 else: 6304 index = self._index - 1 6305 6306 if self.NO_PAREN_IF_COMMANDS and index == 0: 6307 return self._parse_as_command(self._prev) 6308 6309 condition = self._parse_assignment() 6310 6311 if not condition: 6312 self._retreat(index) 6313 return None 6314 6315 self._match(TokenType.THEN) 6316 true = self._parse_assignment() 6317 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6318 self._match(TokenType.END) 6319 this = self.expression(exp.If, this=condition, true=true, false=false) 6320 6321 return this 6322 6323 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6324 if not self._match_text_seq("VALUE", "FOR"): 6325 self._retreat(self._index - 1) 6326 return None 6327 6328 return self.expression( 6329 exp.NextValueFor, 6330 this=self._parse_column(), 6331 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6332 ) 6333 6334 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6335 this = self._parse_function() or self._parse_var_or_string(upper=True) 6336 6337 if self._match(TokenType.FROM): 6338 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6339 6340 if not self._match(TokenType.COMMA): 6341 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6342 6343 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6344 6345 def _parse_gap_fill(self) -> exp.GapFill: 6346 self._match(TokenType.TABLE) 6347 this = self._parse_table() 6348 6349 self._match(TokenType.COMMA) 6350 args = [this, *self._parse_csv(self._parse_lambda)] 6351 6352 gap_fill = exp.GapFill.from_arg_list(args) 6353 return self.validate_expression(gap_fill, args) 6354 6355 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6356 this = self._parse_assignment() 6357 6358 if not self._match(TokenType.ALIAS): 6359 if self._match(TokenType.COMMA): 6360 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6361 6362 self.raise_error("Expected AS after CAST") 6363 6364 fmt = None 6365 to = self._parse_types() 6366 6367 default = self._match(TokenType.DEFAULT) 6368 if default: 6369 default = self._parse_bitwise() 6370 self._match_text_seq("ON", "CONVERSION", "ERROR") 6371 6372 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6373 fmt_string = self._parse_string() 6374 fmt = self._parse_at_time_zone(fmt_string) 6375 6376 if not to: 6377 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6378 if to.this in exp.DataType.TEMPORAL_TYPES: 6379 this = self.expression( 6380 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6381 this=this, 6382 format=exp.Literal.string( 6383 format_time( 6384 fmt_string.this if fmt_string else "", 6385 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6386 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6387 ) 6388 ), 6389 safe=safe, 6390 ) 6391 6392 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6393 this.set("zone", fmt.args["zone"]) 6394 return this 6395 elif not to: 6396 self.raise_error("Expected TYPE after CAST") 6397 elif isinstance(to, exp.Identifier): 6398 to = exp.DataType.build(to.name, udt=True) 6399 elif to.this == exp.DataType.Type.CHAR: 6400 if self._match(TokenType.CHARACTER_SET): 6401 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6402 6403 return self.expression( 6404 exp.Cast if strict else exp.TryCast, 6405 this=this, 6406 to=to, 6407 format=fmt, 6408 safe=safe, 6409 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6410 default=default, 6411 ) 6412 6413 def _parse_string_agg(self) -> exp.GroupConcat: 6414 if self._match(TokenType.DISTINCT): 6415 args: t.List[t.Optional[exp.Expression]] = [ 6416 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6417 ] 6418 if self._match(TokenType.COMMA): 6419 args.extend(self._parse_csv(self._parse_assignment)) 6420 else: 6421 args = self._parse_csv(self._parse_assignment) # type: ignore 6422 6423 if self._match_text_seq("ON", "OVERFLOW"): 6424 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6425 if self._match_text_seq("ERROR"): 6426 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6427 else: 6428 self._match_text_seq("TRUNCATE") 6429 on_overflow = self.expression( 6430 exp.OverflowTruncateBehavior, 6431 this=self._parse_string(), 6432 with_count=( 6433 self._match_text_seq("WITH", "COUNT") 6434 or not self._match_text_seq("WITHOUT", "COUNT") 6435 ), 6436 ) 6437 else: 6438 on_overflow = None 6439 6440 index = self._index 6441 if not self._match(TokenType.R_PAREN) and args: 6442 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6443 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6444 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6445 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6446 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6447 6448 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6449 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6450 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6451 if not self._match_text_seq("WITHIN", "GROUP"): 6452 self._retreat(index) 6453 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6454 6455 # The corresponding match_r_paren will be called in parse_function (caller) 6456 self._match_l_paren() 6457 6458 return self.expression( 6459 exp.GroupConcat, 6460 this=self._parse_order(this=seq_get(args, 0)), 6461 separator=seq_get(args, 1), 6462 on_overflow=on_overflow, 6463 ) 6464 6465 def _parse_convert( 6466 self, strict: bool, safe: t.Optional[bool] = None 6467 ) -> t.Optional[exp.Expression]: 6468 this = self._parse_bitwise() 6469 6470 if self._match(TokenType.USING): 6471 to: t.Optional[exp.Expression] = self.expression( 6472 exp.CharacterSet, this=self._parse_var() 6473 ) 6474 elif self._match(TokenType.COMMA): 6475 to = self._parse_types() 6476 else: 6477 to = None 6478 6479 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6480 6481 def _parse_xml_table(self) -> exp.XMLTable: 6482 namespaces = None 6483 passing = None 6484 columns = None 6485 6486 if self._match_text_seq("XMLNAMESPACES", "("): 6487 namespaces = self._parse_xml_namespace() 6488 self._match_text_seq(")", ",") 6489 6490 this = self._parse_string() 6491 6492 if self._match_text_seq("PASSING"): 6493 # The BY VALUE keywords are optional and are provided for semantic clarity 6494 self._match_text_seq("BY", "VALUE") 6495 passing = self._parse_csv(self._parse_column) 6496 6497 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6498 6499 if self._match_text_seq("COLUMNS"): 6500 columns = self._parse_csv(self._parse_field_def) 6501 6502 return self.expression( 6503 exp.XMLTable, 6504 this=this, 6505 namespaces=namespaces, 6506 passing=passing, 6507 columns=columns, 6508 by_ref=by_ref, 6509 ) 6510 6511 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6512 namespaces = [] 6513 6514 while True: 6515 if self._match(TokenType.DEFAULT): 6516 uri = self._parse_string() 6517 else: 6518 uri = self._parse_alias(self._parse_string()) 6519 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6520 if not self._match(TokenType.COMMA): 6521 break 6522 6523 return namespaces 6524 6525 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6526 """ 6527 There are generally two variants of the DECODE function: 6528 6529 - DECODE(bin, charset) 6530 - DECODE(expression, search, result [, search, result] ... [, default]) 6531 6532 The second variant will always be parsed into a CASE expression. Note that NULL 6533 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6534 instead of relying on pattern matching. 6535 """ 6536 args = self._parse_csv(self._parse_assignment) 6537 6538 if len(args) < 3: 6539 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6540 6541 expression, *expressions = args 6542 if not expression: 6543 return None 6544 6545 ifs = [] 6546 for search, result in zip(expressions[::2], expressions[1::2]): 6547 if not search or not result: 6548 return None 6549 6550 if isinstance(search, exp.Literal): 6551 ifs.append( 6552 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6553 ) 6554 elif isinstance(search, exp.Null): 6555 ifs.append( 6556 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6557 ) 6558 else: 6559 cond = exp.or_( 6560 exp.EQ(this=expression.copy(), expression=search), 6561 exp.and_( 6562 exp.Is(this=expression.copy(), expression=exp.Null()), 6563 exp.Is(this=search.copy(), expression=exp.Null()), 6564 copy=False, 6565 ), 6566 copy=False, 6567 ) 6568 ifs.append(exp.If(this=cond, true=result)) 6569 6570 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6571 6572 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6573 self._match_text_seq("KEY") 6574 key = self._parse_column() 6575 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6576 self._match_text_seq("VALUE") 6577 value = self._parse_bitwise() 6578 6579 if not key and not value: 6580 return None 6581 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6582 6583 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6584 if not this or not self._match_text_seq("FORMAT", "JSON"): 6585 return this 6586 6587 return self.expression(exp.FormatJson, this=this) 6588 6589 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6590 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6591 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6592 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6593 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6594 else: 6595 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6596 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6597 6598 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6599 6600 if not empty and not error and not null: 6601 return None 6602 6603 return self.expression( 6604 exp.OnCondition, 6605 empty=empty, 6606 error=error, 6607 null=null, 6608 ) 6609 6610 def _parse_on_handling( 6611 self, on: str, *values: str 6612 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6613 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6614 for value in values: 6615 if self._match_text_seq(value, "ON", on): 6616 return f"{value} ON {on}" 6617 6618 index = self._index 6619 if self._match(TokenType.DEFAULT): 6620 default_value = self._parse_bitwise() 6621 if self._match_text_seq("ON", on): 6622 return default_value 6623 6624 self._retreat(index) 6625 6626 return None 6627 6628 @t.overload 6629 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6630 6631 @t.overload 6632 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6633 6634 def _parse_json_object(self, agg=False): 6635 star = self._parse_star() 6636 expressions = ( 6637 [star] 6638 if star 6639 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6640 ) 6641 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6642 6643 unique_keys = None 6644 if self._match_text_seq("WITH", "UNIQUE"): 6645 unique_keys = True 6646 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6647 unique_keys = False 6648 6649 self._match_text_seq("KEYS") 6650 6651 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6652 self._parse_type() 6653 ) 6654 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6655 6656 return self.expression( 6657 exp.JSONObjectAgg if agg else exp.JSONObject, 6658 expressions=expressions, 6659 null_handling=null_handling, 6660 unique_keys=unique_keys, 6661 return_type=return_type, 6662 encoding=encoding, 6663 ) 6664 6665 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6666 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6667 if not self._match_text_seq("NESTED"): 6668 this = self._parse_id_var() 6669 kind = self._parse_types(allow_identifiers=False) 6670 nested = None 6671 else: 6672 this = None 6673 kind = None 6674 nested = True 6675 6676 path = self._match_text_seq("PATH") and self._parse_string() 6677 nested_schema = nested and self._parse_json_schema() 6678 6679 return self.expression( 6680 exp.JSONColumnDef, 6681 this=this, 6682 kind=kind, 6683 path=path, 6684 nested_schema=nested_schema, 6685 ) 6686 6687 def _parse_json_schema(self) -> exp.JSONSchema: 6688 self._match_text_seq("COLUMNS") 6689 return self.expression( 6690 exp.JSONSchema, 6691 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6692 ) 6693 6694 def _parse_json_table(self) -> exp.JSONTable: 6695 this = self._parse_format_json(self._parse_bitwise()) 6696 path = self._match(TokenType.COMMA) and self._parse_string() 6697 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6698 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6699 schema = self._parse_json_schema() 6700 6701 return exp.JSONTable( 6702 this=this, 6703 schema=schema, 6704 path=path, 6705 error_handling=error_handling, 6706 empty_handling=empty_handling, 6707 ) 6708 6709 def _parse_match_against(self) -> exp.MatchAgainst: 6710 expressions = self._parse_csv(self._parse_column) 6711 6712 self._match_text_seq(")", "AGAINST", "(") 6713 6714 this = self._parse_string() 6715 6716 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6717 modifier = "IN NATURAL LANGUAGE MODE" 6718 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6719 modifier = f"{modifier} WITH QUERY EXPANSION" 6720 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6721 modifier = "IN BOOLEAN MODE" 6722 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6723 modifier = "WITH QUERY EXPANSION" 6724 else: 6725 modifier = None 6726 6727 return self.expression( 6728 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6729 ) 6730 6731 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6732 def _parse_open_json(self) -> exp.OpenJSON: 6733 this = self._parse_bitwise() 6734 path = self._match(TokenType.COMMA) and self._parse_string() 6735 6736 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6737 this = self._parse_field(any_token=True) 6738 kind = self._parse_types() 6739 path = self._parse_string() 6740 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6741 6742 return self.expression( 6743 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6744 ) 6745 6746 expressions = None 6747 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6748 self._match_l_paren() 6749 expressions = self._parse_csv(_parse_open_json_column_def) 6750 6751 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6752 6753 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6754 args = self._parse_csv(self._parse_bitwise) 6755 6756 if self._match(TokenType.IN): 6757 return self.expression( 6758 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6759 ) 6760 6761 if haystack_first: 6762 haystack = seq_get(args, 0) 6763 needle = seq_get(args, 1) 6764 else: 6765 haystack = seq_get(args, 1) 6766 needle = seq_get(args, 0) 6767 6768 return self.expression( 6769 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6770 ) 6771 6772 def _parse_predict(self) -> exp.Predict: 6773 self._match_text_seq("MODEL") 6774 this = self._parse_table() 6775 6776 self._match(TokenType.COMMA) 6777 self._match_text_seq("TABLE") 6778 6779 return self.expression( 6780 exp.Predict, 6781 this=this, 6782 expression=self._parse_table(), 6783 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6784 ) 6785 6786 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6787 args = self._parse_csv(self._parse_table) 6788 return exp.JoinHint(this=func_name.upper(), expressions=args) 6789 6790 def _parse_substring(self) -> exp.Substring: 6791 # Postgres supports the form: substring(string [from int] [for int]) 6792 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6793 6794 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6795 6796 if self._match(TokenType.FROM): 6797 args.append(self._parse_bitwise()) 6798 if self._match(TokenType.FOR): 6799 if len(args) == 1: 6800 args.append(exp.Literal.number(1)) 6801 args.append(self._parse_bitwise()) 6802 6803 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6804 6805 def _parse_trim(self) -> exp.Trim: 6806 # https://www.w3resource.com/sql/character-functions/trim.php 6807 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6808 6809 position = None 6810 collation = None 6811 expression = None 6812 6813 if self._match_texts(self.TRIM_TYPES): 6814 position = self._prev.text.upper() 6815 6816 this = self._parse_bitwise() 6817 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6818 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6819 expression = self._parse_bitwise() 6820 6821 if invert_order: 6822 this, expression = expression, this 6823 6824 if self._match(TokenType.COLLATE): 6825 collation = self._parse_bitwise() 6826 6827 return self.expression( 6828 exp.Trim, this=this, position=position, expression=expression, collation=collation 6829 ) 6830 6831 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6832 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6833 6834 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6835 return self._parse_window(self._parse_id_var(), alias=True) 6836 6837 def _parse_respect_or_ignore_nulls( 6838 self, this: t.Optional[exp.Expression] 6839 ) -> t.Optional[exp.Expression]: 6840 if self._match_text_seq("IGNORE", "NULLS"): 6841 return self.expression(exp.IgnoreNulls, this=this) 6842 if self._match_text_seq("RESPECT", "NULLS"): 6843 return self.expression(exp.RespectNulls, this=this) 6844 return this 6845 6846 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6847 if self._match(TokenType.HAVING): 6848 self._match_texts(("MAX", "MIN")) 6849 max = self._prev.text.upper() != "MIN" 6850 return self.expression( 6851 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6852 ) 6853 6854 return this 6855 6856 def _parse_window( 6857 self, this: t.Optional[exp.Expression], alias: bool = False 6858 ) -> t.Optional[exp.Expression]: 6859 func = this 6860 comments = func.comments if isinstance(func, exp.Expression) else None 6861 6862 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6863 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6864 if self._match_text_seq("WITHIN", "GROUP"): 6865 order = self._parse_wrapped(self._parse_order) 6866 this = self.expression(exp.WithinGroup, this=this, expression=order) 6867 6868 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6869 self._match(TokenType.WHERE) 6870 this = self.expression( 6871 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6872 ) 6873 self._match_r_paren() 6874 6875 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6876 # Some dialects choose to implement and some do not. 6877 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6878 6879 # There is some code above in _parse_lambda that handles 6880 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6881 6882 # The below changes handle 6883 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6884 6885 # Oracle allows both formats 6886 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6887 # and Snowflake chose to do the same for familiarity 6888 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6889 if isinstance(this, exp.AggFunc): 6890 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6891 6892 if ignore_respect and ignore_respect is not this: 6893 ignore_respect.replace(ignore_respect.this) 6894 this = self.expression(ignore_respect.__class__, this=this) 6895 6896 this = self._parse_respect_or_ignore_nulls(this) 6897 6898 # bigquery select from window x AS (partition by ...) 6899 if alias: 6900 over = None 6901 self._match(TokenType.ALIAS) 6902 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6903 return this 6904 else: 6905 over = self._prev.text.upper() 6906 6907 if comments and isinstance(func, exp.Expression): 6908 func.pop_comments() 6909 6910 if not self._match(TokenType.L_PAREN): 6911 return self.expression( 6912 exp.Window, 6913 comments=comments, 6914 this=this, 6915 alias=self._parse_id_var(False), 6916 over=over, 6917 ) 6918 6919 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6920 6921 first = self._match(TokenType.FIRST) 6922 if self._match_text_seq("LAST"): 6923 first = False 6924 6925 partition, order = self._parse_partition_and_order() 6926 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6927 6928 if kind: 6929 self._match(TokenType.BETWEEN) 6930 start = self._parse_window_spec() 6931 self._match(TokenType.AND) 6932 end = self._parse_window_spec() 6933 exclude = ( 6934 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6935 if self._match_text_seq("EXCLUDE") 6936 else None 6937 ) 6938 6939 spec = self.expression( 6940 exp.WindowSpec, 6941 kind=kind, 6942 start=start["value"], 6943 start_side=start["side"], 6944 end=end["value"], 6945 end_side=end["side"], 6946 exclude=exclude, 6947 ) 6948 else: 6949 spec = None 6950 6951 self._match_r_paren() 6952 6953 window = self.expression( 6954 exp.Window, 6955 comments=comments, 6956 this=this, 6957 partition_by=partition, 6958 order=order, 6959 spec=spec, 6960 alias=window_alias, 6961 over=over, 6962 first=first, 6963 ) 6964 6965 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6966 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6967 return self._parse_window(window, alias=alias) 6968 6969 return window 6970 6971 def _parse_partition_and_order( 6972 self, 6973 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6974 return self._parse_partition_by(), self._parse_order() 6975 6976 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6977 self._match(TokenType.BETWEEN) 6978 6979 return { 6980 "value": ( 6981 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6982 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6983 or self._parse_bitwise() 6984 ), 6985 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6986 } 6987 6988 def _parse_alias( 6989 self, this: t.Optional[exp.Expression], explicit: bool = False 6990 ) -> t.Optional[exp.Expression]: 6991 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6992 # so this section tries to parse the clause version and if it fails, it treats the token 6993 # as an identifier (alias) 6994 if self._can_parse_limit_or_offset(): 6995 return this 6996 6997 any_token = self._match(TokenType.ALIAS) 6998 comments = self._prev_comments or [] 6999 7000 if explicit and not any_token: 7001 return this 7002 7003 if self._match(TokenType.L_PAREN): 7004 aliases = self.expression( 7005 exp.Aliases, 7006 comments=comments, 7007 this=this, 7008 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7009 ) 7010 self._match_r_paren(aliases) 7011 return aliases 7012 7013 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7014 self.STRING_ALIASES and self._parse_string_as_identifier() 7015 ) 7016 7017 if alias: 7018 comments.extend(alias.pop_comments()) 7019 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7020 column = this.this 7021 7022 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7023 if not this.comments and column and column.comments: 7024 this.comments = column.pop_comments() 7025 7026 return this 7027 7028 def _parse_id_var( 7029 self, 7030 any_token: bool = True, 7031 tokens: t.Optional[t.Collection[TokenType]] = None, 7032 ) -> t.Optional[exp.Expression]: 7033 expression = self._parse_identifier() 7034 if not expression and ( 7035 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7036 ): 7037 quoted = self._prev.token_type == TokenType.STRING 7038 expression = self._identifier_expression(quoted=quoted) 7039 7040 return expression 7041 7042 def _parse_string(self) -> t.Optional[exp.Expression]: 7043 if self._match_set(self.STRING_PARSERS): 7044 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7045 return self._parse_placeholder() 7046 7047 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7048 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7049 if output: 7050 output.update_positions(self._prev) 7051 return output 7052 7053 def _parse_number(self) -> t.Optional[exp.Expression]: 7054 if self._match_set(self.NUMERIC_PARSERS): 7055 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7056 return self._parse_placeholder() 7057 7058 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7059 if self._match(TokenType.IDENTIFIER): 7060 return self._identifier_expression(quoted=True) 7061 return self._parse_placeholder() 7062 7063 def _parse_var( 7064 self, 7065 any_token: bool = False, 7066 tokens: t.Optional[t.Collection[TokenType]] = None, 7067 upper: bool = False, 7068 ) -> t.Optional[exp.Expression]: 7069 if ( 7070 (any_token and self._advance_any()) 7071 or self._match(TokenType.VAR) 7072 or (self._match_set(tokens) if tokens else False) 7073 ): 7074 return self.expression( 7075 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7076 ) 7077 return self._parse_placeholder() 7078 7079 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7080 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7081 self._advance() 7082 return self._prev 7083 return None 7084 7085 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7086 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7087 7088 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7089 return self._parse_primary() or self._parse_var(any_token=True) 7090 7091 def _parse_null(self) -> t.Optional[exp.Expression]: 7092 if self._match_set(self.NULL_TOKENS): 7093 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7094 return self._parse_placeholder() 7095 7096 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7097 if self._match(TokenType.TRUE): 7098 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7099 if self._match(TokenType.FALSE): 7100 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7101 return self._parse_placeholder() 7102 7103 def _parse_star(self) -> t.Optional[exp.Expression]: 7104 if self._match(TokenType.STAR): 7105 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7106 return self._parse_placeholder() 7107 7108 def _parse_parameter(self) -> exp.Parameter: 7109 this = self._parse_identifier() or self._parse_primary_or_var() 7110 return self.expression(exp.Parameter, this=this) 7111 7112 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7113 if self._match_set(self.PLACEHOLDER_PARSERS): 7114 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7115 if placeholder: 7116 return placeholder 7117 self._advance(-1) 7118 return None 7119 7120 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7121 if not self._match_texts(keywords): 7122 return None 7123 if self._match(TokenType.L_PAREN, advance=False): 7124 return self._parse_wrapped_csv(self._parse_expression) 7125 7126 expression = self._parse_expression() 7127 return [expression] if expression else None 7128 7129 def _parse_csv( 7130 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7131 ) -> t.List[exp.Expression]: 7132 parse_result = parse_method() 7133 items = [parse_result] if parse_result is not None else [] 7134 7135 while self._match(sep): 7136 self._add_comments(parse_result) 7137 parse_result = parse_method() 7138 if parse_result is not None: 7139 items.append(parse_result) 7140 7141 return items 7142 7143 def _parse_tokens( 7144 self, parse_method: t.Callable, expressions: t.Dict 7145 ) -> t.Optional[exp.Expression]: 7146 this = parse_method() 7147 7148 while self._match_set(expressions): 7149 this = self.expression( 7150 expressions[self._prev.token_type], 7151 this=this, 7152 comments=self._prev_comments, 7153 expression=parse_method(), 7154 ) 7155 7156 return this 7157 7158 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7159 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7160 7161 def _parse_wrapped_csv( 7162 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7163 ) -> t.List[exp.Expression]: 7164 return self._parse_wrapped( 7165 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7166 ) 7167 7168 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7169 wrapped = self._match(TokenType.L_PAREN) 7170 if not wrapped and not optional: 7171 self.raise_error("Expecting (") 7172 parse_result = parse_method() 7173 if wrapped: 7174 self._match_r_paren() 7175 return parse_result 7176 7177 def _parse_expressions(self) -> t.List[exp.Expression]: 7178 return self._parse_csv(self._parse_expression) 7179 7180 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7181 return self._parse_select() or self._parse_set_operations( 7182 self._parse_alias(self._parse_assignment(), explicit=True) 7183 if alias 7184 else self._parse_assignment() 7185 ) 7186 7187 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7188 return self._parse_query_modifiers( 7189 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7190 ) 7191 7192 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7193 this = None 7194 if self._match_texts(self.TRANSACTION_KIND): 7195 this = self._prev.text 7196 7197 self._match_texts(("TRANSACTION", "WORK")) 7198 7199 modes = [] 7200 while True: 7201 mode = [] 7202 while self._match(TokenType.VAR): 7203 mode.append(self._prev.text) 7204 7205 if mode: 7206 modes.append(" ".join(mode)) 7207 if not self._match(TokenType.COMMA): 7208 break 7209 7210 return self.expression(exp.Transaction, this=this, modes=modes) 7211 7212 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7213 chain = None 7214 savepoint = None 7215 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7216 7217 self._match_texts(("TRANSACTION", "WORK")) 7218 7219 if self._match_text_seq("TO"): 7220 self._match_text_seq("SAVEPOINT") 7221 savepoint = self._parse_id_var() 7222 7223 if self._match(TokenType.AND): 7224 chain = not self._match_text_seq("NO") 7225 self._match_text_seq("CHAIN") 7226 7227 if is_rollback: 7228 return self.expression(exp.Rollback, savepoint=savepoint) 7229 7230 return self.expression(exp.Commit, chain=chain) 7231 7232 def _parse_refresh(self) -> exp.Refresh: 7233 self._match(TokenType.TABLE) 7234 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7235 7236 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7237 if not self._prev.text.upper() == "ADD": 7238 return None 7239 7240 start = self._index 7241 self._match(TokenType.COLUMN) 7242 7243 exists_column = self._parse_exists(not_=True) 7244 expression = self._parse_field_def() 7245 7246 if not isinstance(expression, exp.ColumnDef): 7247 self._retreat(start) 7248 return None 7249 7250 expression.set("exists", exists_column) 7251 7252 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7253 if self._match_texts(("FIRST", "AFTER")): 7254 position = self._prev.text 7255 column_position = self.expression( 7256 exp.ColumnPosition, this=self._parse_column(), position=position 7257 ) 7258 expression.set("position", column_position) 7259 7260 return expression 7261 7262 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7263 drop = self._match(TokenType.DROP) and self._parse_drop() 7264 if drop and not isinstance(drop, exp.Command): 7265 drop.set("kind", drop.args.get("kind", "COLUMN")) 7266 return drop 7267 7268 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7269 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7270 return self.expression( 7271 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7272 ) 7273 7274 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7275 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7276 self._match_text_seq("ADD") 7277 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7278 return self.expression( 7279 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7280 ) 7281 7282 column_def = self._parse_add_column() 7283 if isinstance(column_def, exp.ColumnDef): 7284 return column_def 7285 7286 exists = self._parse_exists(not_=True) 7287 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7288 return self.expression( 7289 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7290 ) 7291 7292 return None 7293 7294 if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( 7295 "COLUMNS" 7296 ): 7297 schema = self._parse_schema() 7298 7299 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7300 7301 return self._parse_csv(_parse_add_alteration) 7302 7303 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7304 if self._match_texts(self.ALTER_ALTER_PARSERS): 7305 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7306 7307 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7308 # keyword after ALTER we default to parsing this statement 7309 self._match(TokenType.COLUMN) 7310 column = self._parse_field(any_token=True) 7311 7312 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7313 return self.expression(exp.AlterColumn, this=column, drop=True) 7314 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7315 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7316 if self._match(TokenType.COMMENT): 7317 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7318 if self._match_text_seq("DROP", "NOT", "NULL"): 7319 return self.expression( 7320 exp.AlterColumn, 7321 this=column, 7322 drop=True, 7323 allow_null=True, 7324 ) 7325 if self._match_text_seq("SET", "NOT", "NULL"): 7326 return self.expression( 7327 exp.AlterColumn, 7328 this=column, 7329 allow_null=False, 7330 ) 7331 7332 if self._match_text_seq("SET", "VISIBLE"): 7333 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7334 if self._match_text_seq("SET", "INVISIBLE"): 7335 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7336 7337 self._match_text_seq("SET", "DATA") 7338 self._match_text_seq("TYPE") 7339 return self.expression( 7340 exp.AlterColumn, 7341 this=column, 7342 dtype=self._parse_types(), 7343 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7344 using=self._match(TokenType.USING) and self._parse_assignment(), 7345 ) 7346 7347 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7348 if self._match_texts(("ALL", "EVEN", "AUTO")): 7349 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7350 7351 self._match_text_seq("KEY", "DISTKEY") 7352 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7353 7354 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7355 if compound: 7356 self._match_text_seq("SORTKEY") 7357 7358 if self._match(TokenType.L_PAREN, advance=False): 7359 return self.expression( 7360 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7361 ) 7362 7363 self._match_texts(("AUTO", "NONE")) 7364 return self.expression( 7365 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7366 ) 7367 7368 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7369 index = self._index - 1 7370 7371 partition_exists = self._parse_exists() 7372 if self._match(TokenType.PARTITION, advance=False): 7373 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7374 7375 self._retreat(index) 7376 return self._parse_csv(self._parse_drop_column) 7377 7378 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7379 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7380 exists = self._parse_exists() 7381 old_column = self._parse_column() 7382 to = self._match_text_seq("TO") 7383 new_column = self._parse_column() 7384 7385 if old_column is None or to is None or new_column is None: 7386 return None 7387 7388 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7389 7390 self._match_text_seq("TO") 7391 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7392 7393 def _parse_alter_table_set(self) -> exp.AlterSet: 7394 alter_set = self.expression(exp.AlterSet) 7395 7396 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7397 "TABLE", "PROPERTIES" 7398 ): 7399 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7400 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7401 alter_set.set("expressions", [self._parse_assignment()]) 7402 elif self._match_texts(("LOGGED", "UNLOGGED")): 7403 alter_set.set("option", exp.var(self._prev.text.upper())) 7404 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7405 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7406 elif self._match_text_seq("LOCATION"): 7407 alter_set.set("location", self._parse_field()) 7408 elif self._match_text_seq("ACCESS", "METHOD"): 7409 alter_set.set("access_method", self._parse_field()) 7410 elif self._match_text_seq("TABLESPACE"): 7411 alter_set.set("tablespace", self._parse_field()) 7412 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7413 alter_set.set("file_format", [self._parse_field()]) 7414 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7415 alter_set.set("file_format", self._parse_wrapped_options()) 7416 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7417 alter_set.set("copy_options", self._parse_wrapped_options()) 7418 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7419 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7420 else: 7421 if self._match_text_seq("SERDE"): 7422 alter_set.set("serde", self._parse_field()) 7423 7424 properties = self._parse_wrapped(self._parse_properties, optional=True) 7425 alter_set.set("expressions", [properties]) 7426 7427 return alter_set 7428 7429 def _parse_alter(self) -> exp.Alter | exp.Command: 7430 start = self._prev 7431 7432 alter_token = self._match_set(self.ALTERABLES) and self._prev 7433 if not alter_token: 7434 return self._parse_as_command(start) 7435 7436 exists = self._parse_exists() 7437 only = self._match_text_seq("ONLY") 7438 this = self._parse_table(schema=True) 7439 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7440 7441 if self._next: 7442 self._advance() 7443 7444 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7445 if parser: 7446 actions = ensure_list(parser(self)) 7447 not_valid = self._match_text_seq("NOT", "VALID") 7448 options = self._parse_csv(self._parse_property) 7449 7450 if not self._curr and actions: 7451 return self.expression( 7452 exp.Alter, 7453 this=this, 7454 kind=alter_token.text.upper(), 7455 exists=exists, 7456 actions=actions, 7457 only=only, 7458 options=options, 7459 cluster=cluster, 7460 not_valid=not_valid, 7461 ) 7462 7463 return self._parse_as_command(start) 7464 7465 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7466 start = self._prev 7467 # https://duckdb.org/docs/sql/statements/analyze 7468 if not self._curr: 7469 return self.expression(exp.Analyze) 7470 7471 options = [] 7472 while self._match_texts(self.ANALYZE_STYLES): 7473 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7474 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7475 else: 7476 options.append(self._prev.text.upper()) 7477 7478 this: t.Optional[exp.Expression] = None 7479 inner_expression: t.Optional[exp.Expression] = None 7480 7481 kind = self._curr and self._curr.text.upper() 7482 7483 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7484 this = self._parse_table_parts() 7485 elif self._match_text_seq("TABLES"): 7486 if self._match_set((TokenType.FROM, TokenType.IN)): 7487 kind = f"{kind} {self._prev.text.upper()}" 7488 this = self._parse_table(schema=True, is_db_reference=True) 7489 elif self._match_text_seq("DATABASE"): 7490 this = self._parse_table(schema=True, is_db_reference=True) 7491 elif self._match_text_seq("CLUSTER"): 7492 this = self._parse_table() 7493 # Try matching inner expr keywords before fallback to parse table. 7494 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7495 kind = None 7496 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7497 else: 7498 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7499 kind = None 7500 this = self._parse_table_parts() 7501 7502 partition = self._try_parse(self._parse_partition) 7503 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7504 return self._parse_as_command(start) 7505 7506 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7507 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7508 "WITH", "ASYNC", "MODE" 7509 ): 7510 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7511 else: 7512 mode = None 7513 7514 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7515 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7516 7517 properties = self._parse_properties() 7518 return self.expression( 7519 exp.Analyze, 7520 kind=kind, 7521 this=this, 7522 mode=mode, 7523 partition=partition, 7524 properties=properties, 7525 expression=inner_expression, 7526 options=options, 7527 ) 7528 7529 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7530 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7531 this = None 7532 kind = self._prev.text.upper() 7533 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7534 expressions = [] 7535 7536 if not self._match_text_seq("STATISTICS"): 7537 self.raise_error("Expecting token STATISTICS") 7538 7539 if self._match_text_seq("NOSCAN"): 7540 this = "NOSCAN" 7541 elif self._match(TokenType.FOR): 7542 if self._match_text_seq("ALL", "COLUMNS"): 7543 this = "FOR ALL COLUMNS" 7544 if self._match_texts("COLUMNS"): 7545 this = "FOR COLUMNS" 7546 expressions = self._parse_csv(self._parse_column_reference) 7547 elif self._match_text_seq("SAMPLE"): 7548 sample = self._parse_number() 7549 expressions = [ 7550 self.expression( 7551 exp.AnalyzeSample, 7552 sample=sample, 7553 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7554 ) 7555 ] 7556 7557 return self.expression( 7558 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7559 ) 7560 7561 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7562 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7563 kind = None 7564 this = None 7565 expression: t.Optional[exp.Expression] = None 7566 if self._match_text_seq("REF", "UPDATE"): 7567 kind = "REF" 7568 this = "UPDATE" 7569 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7570 this = "UPDATE SET DANGLING TO NULL" 7571 elif self._match_text_seq("STRUCTURE"): 7572 kind = "STRUCTURE" 7573 if self._match_text_seq("CASCADE", "FAST"): 7574 this = "CASCADE FAST" 7575 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7576 ("ONLINE", "OFFLINE") 7577 ): 7578 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7579 expression = self._parse_into() 7580 7581 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7582 7583 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7584 this = self._prev.text.upper() 7585 if self._match_text_seq("COLUMNS"): 7586 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7587 return None 7588 7589 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7590 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7591 if self._match_text_seq("STATISTICS"): 7592 return self.expression(exp.AnalyzeDelete, kind=kind) 7593 return None 7594 7595 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7596 if self._match_text_seq("CHAINED", "ROWS"): 7597 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7598 return None 7599 7600 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7601 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7602 this = self._prev.text.upper() 7603 expression: t.Optional[exp.Expression] = None 7604 expressions = [] 7605 update_options = None 7606 7607 if self._match_text_seq("HISTOGRAM", "ON"): 7608 expressions = self._parse_csv(self._parse_column_reference) 7609 with_expressions = [] 7610 while self._match(TokenType.WITH): 7611 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7612 if self._match_texts(("SYNC", "ASYNC")): 7613 if self._match_text_seq("MODE", advance=False): 7614 with_expressions.append(f"{self._prev.text.upper()} MODE") 7615 self._advance() 7616 else: 7617 buckets = self._parse_number() 7618 if self._match_text_seq("BUCKETS"): 7619 with_expressions.append(f"{buckets} BUCKETS") 7620 if with_expressions: 7621 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7622 7623 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7624 TokenType.UPDATE, advance=False 7625 ): 7626 update_options = self._prev.text.upper() 7627 self._advance() 7628 elif self._match_text_seq("USING", "DATA"): 7629 expression = self.expression(exp.UsingData, this=self._parse_string()) 7630 7631 return self.expression( 7632 exp.AnalyzeHistogram, 7633 this=this, 7634 expressions=expressions, 7635 expression=expression, 7636 update_options=update_options, 7637 ) 7638 7639 def _parse_merge(self) -> exp.Merge: 7640 self._match(TokenType.INTO) 7641 target = self._parse_table() 7642 7643 if target and self._match(TokenType.ALIAS, advance=False): 7644 target.set("alias", self._parse_table_alias()) 7645 7646 self._match(TokenType.USING) 7647 using = self._parse_table() 7648 7649 self._match(TokenType.ON) 7650 on = self._parse_assignment() 7651 7652 return self.expression( 7653 exp.Merge, 7654 this=target, 7655 using=using, 7656 on=on, 7657 whens=self._parse_when_matched(), 7658 returning=self._parse_returning(), 7659 ) 7660 7661 def _parse_when_matched(self) -> exp.Whens: 7662 whens = [] 7663 7664 while self._match(TokenType.WHEN): 7665 matched = not self._match(TokenType.NOT) 7666 self._match_text_seq("MATCHED") 7667 source = ( 7668 False 7669 if self._match_text_seq("BY", "TARGET") 7670 else self._match_text_seq("BY", "SOURCE") 7671 ) 7672 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7673 7674 self._match(TokenType.THEN) 7675 7676 if self._match(TokenType.INSERT): 7677 this = self._parse_star() 7678 if this: 7679 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7680 else: 7681 then = self.expression( 7682 exp.Insert, 7683 this=exp.var("ROW") 7684 if self._match_text_seq("ROW") 7685 else self._parse_value(values=False), 7686 expression=self._match_text_seq("VALUES") and self._parse_value(), 7687 ) 7688 elif self._match(TokenType.UPDATE): 7689 expressions = self._parse_star() 7690 if expressions: 7691 then = self.expression(exp.Update, expressions=expressions) 7692 else: 7693 then = self.expression( 7694 exp.Update, 7695 expressions=self._match(TokenType.SET) 7696 and self._parse_csv(self._parse_equality), 7697 ) 7698 elif self._match(TokenType.DELETE): 7699 then = self.expression(exp.Var, this=self._prev.text) 7700 else: 7701 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7702 7703 whens.append( 7704 self.expression( 7705 exp.When, 7706 matched=matched, 7707 source=source, 7708 condition=condition, 7709 then=then, 7710 ) 7711 ) 7712 return self.expression(exp.Whens, expressions=whens) 7713 7714 def _parse_show(self) -> t.Optional[exp.Expression]: 7715 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7716 if parser: 7717 return parser(self) 7718 return self._parse_as_command(self._prev) 7719 7720 def _parse_set_item_assignment( 7721 self, kind: t.Optional[str] = None 7722 ) -> t.Optional[exp.Expression]: 7723 index = self._index 7724 7725 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7726 return self._parse_set_transaction(global_=kind == "GLOBAL") 7727 7728 left = self._parse_primary() or self._parse_column() 7729 assignment_delimiter = self._match_texts(("=", "TO")) 7730 7731 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7732 self._retreat(index) 7733 return None 7734 7735 right = self._parse_statement() or self._parse_id_var() 7736 if isinstance(right, (exp.Column, exp.Identifier)): 7737 right = exp.var(right.name) 7738 7739 this = self.expression(exp.EQ, this=left, expression=right) 7740 return self.expression(exp.SetItem, this=this, kind=kind) 7741 7742 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7743 self._match_text_seq("TRANSACTION") 7744 characteristics = self._parse_csv( 7745 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7746 ) 7747 return self.expression( 7748 exp.SetItem, 7749 expressions=characteristics, 7750 kind="TRANSACTION", 7751 **{"global": global_}, # type: ignore 7752 ) 7753 7754 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7755 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7756 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7757 7758 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7759 index = self._index 7760 set_ = self.expression( 7761 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7762 ) 7763 7764 if self._curr: 7765 self._retreat(index) 7766 return self._parse_as_command(self._prev) 7767 7768 return set_ 7769 7770 def _parse_var_from_options( 7771 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7772 ) -> t.Optional[exp.Var]: 7773 start = self._curr 7774 if not start: 7775 return None 7776 7777 option = start.text.upper() 7778 continuations = options.get(option) 7779 7780 index = self._index 7781 self._advance() 7782 for keywords in continuations or []: 7783 if isinstance(keywords, str): 7784 keywords = (keywords,) 7785 7786 if self._match_text_seq(*keywords): 7787 option = f"{option} {' '.join(keywords)}" 7788 break 7789 else: 7790 if continuations or continuations is None: 7791 if raise_unmatched: 7792 self.raise_error(f"Unknown option {option}") 7793 7794 self._retreat(index) 7795 return None 7796 7797 return exp.var(option) 7798 7799 def _parse_as_command(self, start: Token) -> exp.Command: 7800 while self._curr: 7801 self._advance() 7802 text = self._find_sql(start, self._prev) 7803 size = len(start.text) 7804 self._warn_unsupported() 7805 return exp.Command(this=text[:size], expression=text[size:]) 7806 7807 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7808 settings = [] 7809 7810 self._match_l_paren() 7811 kind = self._parse_id_var() 7812 7813 if self._match(TokenType.L_PAREN): 7814 while True: 7815 key = self._parse_id_var() 7816 value = self._parse_primary() 7817 if not key and value is None: 7818 break 7819 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7820 self._match(TokenType.R_PAREN) 7821 7822 self._match_r_paren() 7823 7824 return self.expression( 7825 exp.DictProperty, 7826 this=this, 7827 kind=kind.this if kind else None, 7828 settings=settings, 7829 ) 7830 7831 def _parse_dict_range(self, this: str) -> exp.DictRange: 7832 self._match_l_paren() 7833 has_min = self._match_text_seq("MIN") 7834 if has_min: 7835 min = self._parse_var() or self._parse_primary() 7836 self._match_text_seq("MAX") 7837 max = self._parse_var() or self._parse_primary() 7838 else: 7839 max = self._parse_var() or self._parse_primary() 7840 min = exp.Literal.number(0) 7841 self._match_r_paren() 7842 return self.expression(exp.DictRange, this=this, min=min, max=max) 7843 7844 def _parse_comprehension( 7845 self, this: t.Optional[exp.Expression] 7846 ) -> t.Optional[exp.Comprehension]: 7847 index = self._index 7848 expression = self._parse_column() 7849 if not self._match(TokenType.IN): 7850 self._retreat(index - 1) 7851 return None 7852 iterator = self._parse_column() 7853 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7854 return self.expression( 7855 exp.Comprehension, 7856 this=this, 7857 expression=expression, 7858 iterator=iterator, 7859 condition=condition, 7860 ) 7861 7862 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7863 if self._match(TokenType.HEREDOC_STRING): 7864 return self.expression(exp.Heredoc, this=self._prev.text) 7865 7866 if not self._match_text_seq("$"): 7867 return None 7868 7869 tags = ["$"] 7870 tag_text = None 7871 7872 if self._is_connected(): 7873 self._advance() 7874 tags.append(self._prev.text.upper()) 7875 else: 7876 self.raise_error("No closing $ found") 7877 7878 if tags[-1] != "$": 7879 if self._is_connected() and self._match_text_seq("$"): 7880 tag_text = tags[-1] 7881 tags.append("$") 7882 else: 7883 self.raise_error("No closing $ found") 7884 7885 heredoc_start = self._curr 7886 7887 while self._curr: 7888 if self._match_text_seq(*tags, advance=False): 7889 this = self._find_sql(heredoc_start, self._prev) 7890 self._advance(len(tags)) 7891 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7892 7893 self._advance() 7894 7895 self.raise_error(f"No closing {''.join(tags)} found") 7896 return None 7897 7898 def _find_parser( 7899 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7900 ) -> t.Optional[t.Callable]: 7901 if not self._curr: 7902 return None 7903 7904 index = self._index 7905 this = [] 7906 while True: 7907 # The current token might be multiple words 7908 curr = self._curr.text.upper() 7909 key = curr.split(" ") 7910 this.append(curr) 7911 7912 self._advance() 7913 result, trie = in_trie(trie, key) 7914 if result == TrieResult.FAILED: 7915 break 7916 7917 if result == TrieResult.EXISTS: 7918 subparser = parsers[" ".join(this)] 7919 return subparser 7920 7921 self._retreat(index) 7922 return None 7923 7924 def _match(self, token_type, advance=True, expression=None): 7925 if not self._curr: 7926 return None 7927 7928 if self._curr.token_type == token_type: 7929 if advance: 7930 self._advance() 7931 self._add_comments(expression) 7932 return True 7933 7934 return None 7935 7936 def _match_set(self, types, advance=True): 7937 if not self._curr: 7938 return None 7939 7940 if self._curr.token_type in types: 7941 if advance: 7942 self._advance() 7943 return True 7944 7945 return None 7946 7947 def _match_pair(self, token_type_a, token_type_b, advance=True): 7948 if not self._curr or not self._next: 7949 return None 7950 7951 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7952 if advance: 7953 self._advance(2) 7954 return True 7955 7956 return None 7957 7958 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7959 if not self._match(TokenType.L_PAREN, expression=expression): 7960 self.raise_error("Expecting (") 7961 7962 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7963 if not self._match(TokenType.R_PAREN, expression=expression): 7964 self.raise_error("Expecting )") 7965 7966 def _match_texts(self, texts, advance=True): 7967 if ( 7968 self._curr 7969 and self._curr.token_type != TokenType.STRING 7970 and self._curr.text.upper() in texts 7971 ): 7972 if advance: 7973 self._advance() 7974 return True 7975 return None 7976 7977 def _match_text_seq(self, *texts, advance=True): 7978 index = self._index 7979 for text in texts: 7980 if ( 7981 self._curr 7982 and self._curr.token_type != TokenType.STRING 7983 and self._curr.text.upper() == text 7984 ): 7985 self._advance() 7986 else: 7987 self._retreat(index) 7988 return None 7989 7990 if not advance: 7991 self._retreat(index) 7992 7993 return True 7994 7995 def _replace_lambda( 7996 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7997 ) -> t.Optional[exp.Expression]: 7998 if not node: 7999 return node 8000 8001 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8002 8003 for column in node.find_all(exp.Column): 8004 typ = lambda_types.get(column.parts[0].name) 8005 if typ is not None: 8006 dot_or_id = column.to_dot() if column.table else column.this 8007 8008 if typ: 8009 dot_or_id = self.expression( 8010 exp.Cast, 8011 this=dot_or_id, 8012 to=typ, 8013 ) 8014 8015 parent = column.parent 8016 8017 while isinstance(parent, exp.Dot): 8018 if not isinstance(parent.parent, exp.Dot): 8019 parent.replace(dot_or_id) 8020 break 8021 parent = parent.parent 8022 else: 8023 if column is node: 8024 node = dot_or_id 8025 else: 8026 column.replace(dot_or_id) 8027 return node 8028 8029 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8030 start = self._prev 8031 8032 # Not to be confused with TRUNCATE(number, decimals) function call 8033 if self._match(TokenType.L_PAREN): 8034 self._retreat(self._index - 2) 8035 return self._parse_function() 8036 8037 # Clickhouse supports TRUNCATE DATABASE as well 8038 is_database = self._match(TokenType.DATABASE) 8039 8040 self._match(TokenType.TABLE) 8041 8042 exists = self._parse_exists(not_=False) 8043 8044 expressions = self._parse_csv( 8045 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8046 ) 8047 8048 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8049 8050 if self._match_text_seq("RESTART", "IDENTITY"): 8051 identity = "RESTART" 8052 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8053 identity = "CONTINUE" 8054 else: 8055 identity = None 8056 8057 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8058 option = self._prev.text 8059 else: 8060 option = None 8061 8062 partition = self._parse_partition() 8063 8064 # Fallback case 8065 if self._curr: 8066 return self._parse_as_command(start) 8067 8068 return self.expression( 8069 exp.TruncateTable, 8070 expressions=expressions, 8071 is_database=is_database, 8072 exists=exists, 8073 cluster=cluster, 8074 identity=identity, 8075 option=option, 8076 partition=partition, 8077 ) 8078 8079 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8080 this = self._parse_ordered(self._parse_opclass) 8081 8082 if not self._match(TokenType.WITH): 8083 return this 8084 8085 op = self._parse_var(any_token=True) 8086 8087 return self.expression(exp.WithOperator, this=this, op=op) 8088 8089 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8090 self._match(TokenType.EQ) 8091 self._match(TokenType.L_PAREN) 8092 8093 opts: t.List[t.Optional[exp.Expression]] = [] 8094 option: exp.Expression | None 8095 while self._curr and not self._match(TokenType.R_PAREN): 8096 if self._match_text_seq("FORMAT_NAME", "="): 8097 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8098 option = self._parse_format_name() 8099 else: 8100 option = self._parse_property() 8101 8102 if option is None: 8103 self.raise_error("Unable to parse option") 8104 break 8105 8106 opts.append(option) 8107 8108 return opts 8109 8110 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8111 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8112 8113 options = [] 8114 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8115 option = self._parse_var(any_token=True) 8116 prev = self._prev.text.upper() 8117 8118 # Different dialects might separate options and values by white space, "=" and "AS" 8119 self._match(TokenType.EQ) 8120 self._match(TokenType.ALIAS) 8121 8122 param = self.expression(exp.CopyParameter, this=option) 8123 8124 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8125 TokenType.L_PAREN, advance=False 8126 ): 8127 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8128 param.set("expressions", self._parse_wrapped_options()) 8129 elif prev == "FILE_FORMAT": 8130 # T-SQL's external file format case 8131 param.set("expression", self._parse_field()) 8132 else: 8133 param.set("expression", self._parse_unquoted_field()) 8134 8135 options.append(param) 8136 self._match(sep) 8137 8138 return options 8139 8140 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8141 expr = self.expression(exp.Credentials) 8142 8143 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8144 expr.set("storage", self._parse_field()) 8145 if self._match_text_seq("CREDENTIALS"): 8146 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8147 creds = ( 8148 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8149 ) 8150 expr.set("credentials", creds) 8151 if self._match_text_seq("ENCRYPTION"): 8152 expr.set("encryption", self._parse_wrapped_options()) 8153 if self._match_text_seq("IAM_ROLE"): 8154 expr.set("iam_role", self._parse_field()) 8155 if self._match_text_seq("REGION"): 8156 expr.set("region", self._parse_field()) 8157 8158 return expr 8159 8160 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8161 return self._parse_field() 8162 8163 def _parse_copy(self) -> exp.Copy | exp.Command: 8164 start = self._prev 8165 8166 self._match(TokenType.INTO) 8167 8168 this = ( 8169 self._parse_select(nested=True, parse_subquery_alias=False) 8170 if self._match(TokenType.L_PAREN, advance=False) 8171 else self._parse_table(schema=True) 8172 ) 8173 8174 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8175 8176 files = self._parse_csv(self._parse_file_location) 8177 credentials = self._parse_credentials() 8178 8179 self._match_text_seq("WITH") 8180 8181 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8182 8183 # Fallback case 8184 if self._curr: 8185 return self._parse_as_command(start) 8186 8187 return self.expression( 8188 exp.Copy, 8189 this=this, 8190 kind=kind, 8191 credentials=credentials, 8192 files=files, 8193 params=params, 8194 ) 8195 8196 def _parse_normalize(self) -> exp.Normalize: 8197 return self.expression( 8198 exp.Normalize, 8199 this=self._parse_bitwise(), 8200 form=self._match(TokenType.COMMA) and self._parse_var(), 8201 ) 8202 8203 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8204 args = self._parse_csv(lambda: self._parse_lambda()) 8205 8206 this = seq_get(args, 0) 8207 decimals = seq_get(args, 1) 8208 8209 return expr_type( 8210 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8211 ) 8212 8213 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8214 star_token = self._prev 8215 8216 if self._match_text_seq("COLUMNS", "(", advance=False): 8217 this = self._parse_function() 8218 if isinstance(this, exp.Columns): 8219 this.set("unpack", True) 8220 return this 8221 8222 return self.expression( 8223 exp.Star, 8224 **{ # type: ignore 8225 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8226 "replace": self._parse_star_op("REPLACE"), 8227 "rename": self._parse_star_op("RENAME"), 8228 }, 8229 ).update_positions(star_token) 8230 8231 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8232 privilege_parts = [] 8233 8234 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8235 # (end of privilege list) or L_PAREN (start of column list) are met 8236 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8237 privilege_parts.append(self._curr.text.upper()) 8238 self._advance() 8239 8240 this = exp.var(" ".join(privilege_parts)) 8241 expressions = ( 8242 self._parse_wrapped_csv(self._parse_column) 8243 if self._match(TokenType.L_PAREN, advance=False) 8244 else None 8245 ) 8246 8247 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8248 8249 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8250 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8251 principal = self._parse_id_var() 8252 8253 if not principal: 8254 return None 8255 8256 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8257 8258 def _parse_grant(self) -> exp.Grant | exp.Command: 8259 start = self._prev 8260 8261 privileges = self._parse_csv(self._parse_grant_privilege) 8262 8263 self._match(TokenType.ON) 8264 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8265 8266 # Attempt to parse the securable e.g. MySQL allows names 8267 # such as "foo.*", "*.*" which are not easily parseable yet 8268 securable = self._try_parse(self._parse_table_parts) 8269 8270 if not securable or not self._match_text_seq("TO"): 8271 return self._parse_as_command(start) 8272 8273 principals = self._parse_csv(self._parse_grant_principal) 8274 8275 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8276 8277 if self._curr: 8278 return self._parse_as_command(start) 8279 8280 return self.expression( 8281 exp.Grant, 8282 privileges=privileges, 8283 kind=kind, 8284 securable=securable, 8285 principals=principals, 8286 grant_option=grant_option, 8287 ) 8288 8289 def _parse_overlay(self) -> exp.Overlay: 8290 return self.expression( 8291 exp.Overlay, 8292 **{ # type: ignore 8293 "this": self._parse_bitwise(), 8294 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8295 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8296 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8297 }, 8298 ) 8299 8300 def _parse_format_name(self) -> exp.Property: 8301 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8302 # for FILE_FORMAT = <format_name> 8303 return self.expression( 8304 exp.Property, 8305 this=exp.var("FORMAT_NAME"), 8306 value=self._parse_string() or self._parse_table_parts(), 8307 ) 8308 8309 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8310 args: t.List[exp.Expression] = [] 8311 8312 if self._match(TokenType.DISTINCT): 8313 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8314 self._match(TokenType.COMMA) 8315 8316 args.extend(self._parse_csv(self._parse_assignment)) 8317 8318 return self.expression( 8319 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8320 ) 8321 8322 def _identifier_expression( 8323 self, token: t.Optional[Token] = None, **kwargs: t.Any 8324 ) -> exp.Identifier: 8325 token = token or self._prev 8326 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8327 expression.update_positions(token) 8328 return expression 8329 8330 def _build_pipe_cte(self, query: exp.Query, expressions: t.List[exp.Expression]) -> exp.Query: 8331 if query.selects: 8332 self._pipe_cte_counter += 1 8333 new_cte = f"__tmp{self._pipe_cte_counter}" 8334 8335 # For `exp.Select`, generated CTEs are attached to its `with` 8336 # For `exp.SetOperation`, generated CTEs are attached to the `with` of its LHS, accessed via `this` 8337 with_ = ( 8338 query.args.get("with") 8339 if isinstance(query, exp.Select) 8340 else query.this.args.get("with") 8341 ) 8342 ctes = with_.pop() if with_ else None 8343 8344 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8345 if ctes: 8346 new_select.set("with", ctes) 8347 8348 return new_select.with_(new_cte, as_=query, copy=False) 8349 8350 return query.select(*expressions, copy=False) 8351 8352 def _parse_pipe_syntax_select(self, query: exp.Query) -> exp.Query: 8353 select = self._parse_select() 8354 if isinstance(select, exp.Select): 8355 return self._build_pipe_cte(query, select.expressions) 8356 8357 return query 8358 8359 def _parse_pipe_syntax_where(self, query: exp.Query) -> exp.Query: 8360 where = self._parse_where() 8361 return query.where(where, copy=False) 8362 8363 def _parse_pipe_syntax_limit(self, query: exp.Query) -> exp.Query: 8364 limit = self._parse_limit() 8365 offset = self._parse_offset() 8366 if limit: 8367 curr_limit = query.args.get("limit", limit) 8368 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8369 query.limit(limit, copy=False) 8370 if offset: 8371 curr_offset = query.args.get("offset") 8372 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8373 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8374 return query 8375 8376 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8377 this = self._parse_assignment() 8378 if self._match_text_seq("GROUP", "AND", advance=False): 8379 return this 8380 8381 this = self._parse_alias(this) 8382 8383 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8384 return self._parse_ordered(lambda: this) 8385 8386 return this 8387 8388 def _parse_pipe_syntax_aggregate_group_order_by( 8389 self, query: exp.Query, group_by_exists: bool = True 8390 ) -> exp.Query: 8391 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8392 aggregates_or_groups, orders = [], [] 8393 for element in expr: 8394 if isinstance(element, exp.Ordered): 8395 this = element.this 8396 if isinstance(this, exp.Alias): 8397 element.set("this", this.args["alias"]) 8398 orders.append(element) 8399 else: 8400 this = element 8401 aggregates_or_groups.append(this) 8402 8403 if group_by_exists and isinstance(query, exp.Select): 8404 query = query.select(*aggregates_or_groups, copy=False).group_by( 8405 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8406 copy=False, 8407 ) 8408 else: 8409 query = query.select(*aggregates_or_groups, append=False, copy=False) 8410 8411 if orders: 8412 return query.order_by(*orders, append=False, copy=False) 8413 8414 return query 8415 8416 def _parse_pipe_syntax_aggregate(self, query: exp.Query) -> exp.Query: 8417 self._match_text_seq("AGGREGATE") 8418 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8419 8420 if self._match(TokenType.GROUP_BY) or ( 8421 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8422 ): 8423 return self._parse_pipe_syntax_aggregate_group_order_by(query) 8424 8425 return query 8426 8427 def _parse_pipe_syntax_set_operator( 8428 self, query: t.Optional[exp.Query] 8429 ) -> t.Optional[exp.Query]: 8430 first_setop = self.parse_set_operation(this=query) 8431 8432 if not first_setop or not query: 8433 return None 8434 8435 if not query.selects: 8436 query.select("*", copy=False) 8437 8438 this = first_setop.this.pop() 8439 distinct = first_setop.args.pop("distinct") 8440 setops = [first_setop.expression.pop(), *self._parse_expressions()] 8441 8442 if isinstance(first_setop, exp.Union): 8443 query = query.union(*setops, distinct=distinct, copy=False, **first_setop.args) 8444 elif isinstance(first_setop, exp.Except): 8445 query = query.except_(*setops, distinct=distinct, copy=False, **first_setop.args) 8446 else: 8447 query = query.intersect(*setops, distinct=distinct, copy=False, **first_setop.args) 8448 8449 return self._build_pipe_cte( 8450 query, [projection.args.get("alias", projection) for projection in this.expressions] 8451 ) 8452 8453 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8454 while self._match(TokenType.PIPE_GT): 8455 start = self._curr 8456 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8457 if not parser: 8458 set_op_query = self._parse_pipe_syntax_set_operator(query) 8459 if not set_op_query: 8460 self._retreat(start) 8461 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8462 break 8463 8464 query = set_op_query 8465 else: 8466 query = parser(self, query) 8467 8468 if query and not query.selects: 8469 return query.select("*", copy=False) 8470 8471 return query
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
178class Parser(metaclass=_Parser): 179 """ 180 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 181 182 Args: 183 error_level: The desired error level. 184 Default: ErrorLevel.IMMEDIATE 185 error_message_context: The amount of context to capture from a query string when displaying 186 the error message (in number of characters). 187 Default: 100 188 max_errors: Maximum number of error messages to include in a raised ParseError. 189 This is only relevant if error_level is ErrorLevel.RAISE. 190 Default: 3 191 """ 192 193 FUNCTIONS: t.Dict[str, t.Callable] = { 194 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 195 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 196 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 197 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "CHAR": lambda args: exp.Chr(expressions=args), 204 "CHR": lambda args: exp.Chr(expressions=args), 205 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 206 "CONCAT": lambda args, dialect: exp.Concat( 207 expressions=args, 208 safe=not dialect.STRICT_STRING_CONCAT, 209 coalesce=dialect.CONCAT_COALESCE, 210 ), 211 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 212 expressions=args, 213 safe=not dialect.STRICT_STRING_CONCAT, 214 coalesce=dialect.CONCAT_COALESCE, 215 ), 216 "CONVERT_TIMEZONE": build_convert_timezone, 217 "DATE_TO_DATE_STR": lambda args: exp.Cast( 218 this=seq_get(args, 0), 219 to=exp.DataType(this=exp.DataType.Type.TEXT), 220 ), 221 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 222 start=seq_get(args, 0), 223 end=seq_get(args, 1), 224 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 225 ), 226 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 227 "HEX": build_hex, 228 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 229 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 230 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 231 "LIKE": build_like, 232 "LOG": build_logarithm, 233 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 234 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 235 "LOWER": build_lower, 236 "LPAD": lambda args: build_pad(args), 237 "LEFTPAD": lambda args: build_pad(args), 238 "LTRIM": lambda args: build_trim(args), 239 "MOD": build_mod, 240 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 241 "RPAD": lambda args: build_pad(args, is_left=False), 242 "RTRIM": lambda args: build_trim(args, is_left=False), 243 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 244 if len(args) != 2 245 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 246 "STRPOS": exp.StrPosition.from_arg_list, 247 "CHARINDEX": lambda args: build_locate_strposition(args), 248 "INSTR": exp.StrPosition.from_arg_list, 249 "LOCATE": lambda args: build_locate_strposition(args), 250 "TIME_TO_TIME_STR": lambda args: exp.Cast( 251 this=seq_get(args, 0), 252 to=exp.DataType(this=exp.DataType.Type.TEXT), 253 ), 254 "TO_HEX": build_hex, 255 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 256 this=exp.Cast( 257 this=seq_get(args, 0), 258 to=exp.DataType(this=exp.DataType.Type.TEXT), 259 ), 260 start=exp.Literal.number(1), 261 length=exp.Literal.number(10), 262 ), 263 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 264 "UPPER": build_upper, 265 "VAR_MAP": build_var_map, 266 } 267 268 NO_PAREN_FUNCTIONS = { 269 TokenType.CURRENT_DATE: exp.CurrentDate, 270 TokenType.CURRENT_DATETIME: exp.CurrentDate, 271 TokenType.CURRENT_TIME: exp.CurrentTime, 272 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 273 TokenType.CURRENT_USER: exp.CurrentUser, 274 } 275 276 STRUCT_TYPE_TOKENS = { 277 TokenType.NESTED, 278 TokenType.OBJECT, 279 TokenType.STRUCT, 280 TokenType.UNION, 281 } 282 283 NESTED_TYPE_TOKENS = { 284 TokenType.ARRAY, 285 TokenType.LIST, 286 TokenType.LOWCARDINALITY, 287 TokenType.MAP, 288 TokenType.NULLABLE, 289 TokenType.RANGE, 290 *STRUCT_TYPE_TOKENS, 291 } 292 293 ENUM_TYPE_TOKENS = { 294 TokenType.DYNAMIC, 295 TokenType.ENUM, 296 TokenType.ENUM8, 297 TokenType.ENUM16, 298 } 299 300 AGGREGATE_TYPE_TOKENS = { 301 TokenType.AGGREGATEFUNCTION, 302 TokenType.SIMPLEAGGREGATEFUNCTION, 303 } 304 305 TYPE_TOKENS = { 306 TokenType.BIT, 307 TokenType.BOOLEAN, 308 TokenType.TINYINT, 309 TokenType.UTINYINT, 310 TokenType.SMALLINT, 311 TokenType.USMALLINT, 312 TokenType.INT, 313 TokenType.UINT, 314 TokenType.BIGINT, 315 TokenType.UBIGINT, 316 TokenType.INT128, 317 TokenType.UINT128, 318 TokenType.INT256, 319 TokenType.UINT256, 320 TokenType.MEDIUMINT, 321 TokenType.UMEDIUMINT, 322 TokenType.FIXEDSTRING, 323 TokenType.FLOAT, 324 TokenType.DOUBLE, 325 TokenType.UDOUBLE, 326 TokenType.CHAR, 327 TokenType.NCHAR, 328 TokenType.VARCHAR, 329 TokenType.NVARCHAR, 330 TokenType.BPCHAR, 331 TokenType.TEXT, 332 TokenType.MEDIUMTEXT, 333 TokenType.LONGTEXT, 334 TokenType.BLOB, 335 TokenType.MEDIUMBLOB, 336 TokenType.LONGBLOB, 337 TokenType.BINARY, 338 TokenType.VARBINARY, 339 TokenType.JSON, 340 TokenType.JSONB, 341 TokenType.INTERVAL, 342 TokenType.TINYBLOB, 343 TokenType.TINYTEXT, 344 TokenType.TIME, 345 TokenType.TIMETZ, 346 TokenType.TIMESTAMP, 347 TokenType.TIMESTAMP_S, 348 TokenType.TIMESTAMP_MS, 349 TokenType.TIMESTAMP_NS, 350 TokenType.TIMESTAMPTZ, 351 TokenType.TIMESTAMPLTZ, 352 TokenType.TIMESTAMPNTZ, 353 TokenType.DATETIME, 354 TokenType.DATETIME2, 355 TokenType.DATETIME64, 356 TokenType.SMALLDATETIME, 357 TokenType.DATE, 358 TokenType.DATE32, 359 TokenType.INT4RANGE, 360 TokenType.INT4MULTIRANGE, 361 TokenType.INT8RANGE, 362 TokenType.INT8MULTIRANGE, 363 TokenType.NUMRANGE, 364 TokenType.NUMMULTIRANGE, 365 TokenType.TSRANGE, 366 TokenType.TSMULTIRANGE, 367 TokenType.TSTZRANGE, 368 TokenType.TSTZMULTIRANGE, 369 TokenType.DATERANGE, 370 TokenType.DATEMULTIRANGE, 371 TokenType.DECIMAL, 372 TokenType.DECIMAL32, 373 TokenType.DECIMAL64, 374 TokenType.DECIMAL128, 375 TokenType.DECIMAL256, 376 TokenType.UDECIMAL, 377 TokenType.BIGDECIMAL, 378 TokenType.UUID, 379 TokenType.GEOGRAPHY, 380 TokenType.GEOMETRY, 381 TokenType.POINT, 382 TokenType.RING, 383 TokenType.LINESTRING, 384 TokenType.MULTILINESTRING, 385 TokenType.POLYGON, 386 TokenType.MULTIPOLYGON, 387 TokenType.HLLSKETCH, 388 TokenType.HSTORE, 389 TokenType.PSEUDO_TYPE, 390 TokenType.SUPER, 391 TokenType.SERIAL, 392 TokenType.SMALLSERIAL, 393 TokenType.BIGSERIAL, 394 TokenType.XML, 395 TokenType.YEAR, 396 TokenType.USERDEFINED, 397 TokenType.MONEY, 398 TokenType.SMALLMONEY, 399 TokenType.ROWVERSION, 400 TokenType.IMAGE, 401 TokenType.VARIANT, 402 TokenType.VECTOR, 403 TokenType.VOID, 404 TokenType.OBJECT, 405 TokenType.OBJECT_IDENTIFIER, 406 TokenType.INET, 407 TokenType.IPADDRESS, 408 TokenType.IPPREFIX, 409 TokenType.IPV4, 410 TokenType.IPV6, 411 TokenType.UNKNOWN, 412 TokenType.NOTHING, 413 TokenType.NULL, 414 TokenType.NAME, 415 TokenType.TDIGEST, 416 TokenType.DYNAMIC, 417 *ENUM_TYPE_TOKENS, 418 *NESTED_TYPE_TOKENS, 419 *AGGREGATE_TYPE_TOKENS, 420 } 421 422 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 423 TokenType.BIGINT: TokenType.UBIGINT, 424 TokenType.INT: TokenType.UINT, 425 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 426 TokenType.SMALLINT: TokenType.USMALLINT, 427 TokenType.TINYINT: TokenType.UTINYINT, 428 TokenType.DECIMAL: TokenType.UDECIMAL, 429 TokenType.DOUBLE: TokenType.UDOUBLE, 430 } 431 432 SUBQUERY_PREDICATES = { 433 TokenType.ANY: exp.Any, 434 TokenType.ALL: exp.All, 435 TokenType.EXISTS: exp.Exists, 436 TokenType.SOME: exp.Any, 437 } 438 439 RESERVED_TOKENS = { 440 *Tokenizer.SINGLE_TOKENS.values(), 441 TokenType.SELECT, 442 } - {TokenType.IDENTIFIER} 443 444 DB_CREATABLES = { 445 TokenType.DATABASE, 446 TokenType.DICTIONARY, 447 TokenType.FILE_FORMAT, 448 TokenType.MODEL, 449 TokenType.NAMESPACE, 450 TokenType.SCHEMA, 451 TokenType.SEQUENCE, 452 TokenType.SINK, 453 TokenType.SOURCE, 454 TokenType.STAGE, 455 TokenType.STORAGE_INTEGRATION, 456 TokenType.STREAMLIT, 457 TokenType.TABLE, 458 TokenType.TAG, 459 TokenType.VIEW, 460 TokenType.WAREHOUSE, 461 } 462 463 CREATABLES = { 464 TokenType.COLUMN, 465 TokenType.CONSTRAINT, 466 TokenType.FOREIGN_KEY, 467 TokenType.FUNCTION, 468 TokenType.INDEX, 469 TokenType.PROCEDURE, 470 *DB_CREATABLES, 471 } 472 473 ALTERABLES = { 474 TokenType.INDEX, 475 TokenType.TABLE, 476 TokenType.VIEW, 477 } 478 479 # Tokens that can represent identifiers 480 ID_VAR_TOKENS = { 481 TokenType.ALL, 482 TokenType.ATTACH, 483 TokenType.VAR, 484 TokenType.ANTI, 485 TokenType.APPLY, 486 TokenType.ASC, 487 TokenType.ASOF, 488 TokenType.AUTO_INCREMENT, 489 TokenType.BEGIN, 490 TokenType.BPCHAR, 491 TokenType.CACHE, 492 TokenType.CASE, 493 TokenType.COLLATE, 494 TokenType.COMMAND, 495 TokenType.COMMENT, 496 TokenType.COMMIT, 497 TokenType.CONSTRAINT, 498 TokenType.COPY, 499 TokenType.CUBE, 500 TokenType.CURRENT_SCHEMA, 501 TokenType.DEFAULT, 502 TokenType.DELETE, 503 TokenType.DESC, 504 TokenType.DESCRIBE, 505 TokenType.DETACH, 506 TokenType.DICTIONARY, 507 TokenType.DIV, 508 TokenType.END, 509 TokenType.EXECUTE, 510 TokenType.EXPORT, 511 TokenType.ESCAPE, 512 TokenType.FALSE, 513 TokenType.FIRST, 514 TokenType.FILTER, 515 TokenType.FINAL, 516 TokenType.FORMAT, 517 TokenType.FULL, 518 TokenType.GET, 519 TokenType.IDENTIFIER, 520 TokenType.IS, 521 TokenType.ISNULL, 522 TokenType.INTERVAL, 523 TokenType.KEEP, 524 TokenType.KILL, 525 TokenType.LEFT, 526 TokenType.LIMIT, 527 TokenType.LOAD, 528 TokenType.MERGE, 529 TokenType.NATURAL, 530 TokenType.NEXT, 531 TokenType.OFFSET, 532 TokenType.OPERATOR, 533 TokenType.ORDINALITY, 534 TokenType.OVERLAPS, 535 TokenType.OVERWRITE, 536 TokenType.PARTITION, 537 TokenType.PERCENT, 538 TokenType.PIVOT, 539 TokenType.PRAGMA, 540 TokenType.PUT, 541 TokenType.RANGE, 542 TokenType.RECURSIVE, 543 TokenType.REFERENCES, 544 TokenType.REFRESH, 545 TokenType.RENAME, 546 TokenType.REPLACE, 547 TokenType.RIGHT, 548 TokenType.ROLLUP, 549 TokenType.ROW, 550 TokenType.ROWS, 551 TokenType.SEMI, 552 TokenType.SET, 553 TokenType.SETTINGS, 554 TokenType.SHOW, 555 TokenType.TEMPORARY, 556 TokenType.TOP, 557 TokenType.TRUE, 558 TokenType.TRUNCATE, 559 TokenType.UNIQUE, 560 TokenType.UNNEST, 561 TokenType.UNPIVOT, 562 TokenType.UPDATE, 563 TokenType.USE, 564 TokenType.VOLATILE, 565 TokenType.WINDOW, 566 *CREATABLES, 567 *SUBQUERY_PREDICATES, 568 *TYPE_TOKENS, 569 *NO_PAREN_FUNCTIONS, 570 } 571 ID_VAR_TOKENS.remove(TokenType.UNION) 572 573 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 574 TokenType.ANTI, 575 TokenType.APPLY, 576 TokenType.ASOF, 577 TokenType.FULL, 578 TokenType.LEFT, 579 TokenType.LOCK, 580 TokenType.NATURAL, 581 TokenType.RIGHT, 582 TokenType.SEMI, 583 TokenType.WINDOW, 584 } 585 586 ALIAS_TOKENS = ID_VAR_TOKENS 587 588 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 589 590 ARRAY_CONSTRUCTORS = { 591 "ARRAY": exp.Array, 592 "LIST": exp.List, 593 } 594 595 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 596 597 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 598 599 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 600 601 FUNC_TOKENS = { 602 TokenType.COLLATE, 603 TokenType.COMMAND, 604 TokenType.CURRENT_DATE, 605 TokenType.CURRENT_DATETIME, 606 TokenType.CURRENT_SCHEMA, 607 TokenType.CURRENT_TIMESTAMP, 608 TokenType.CURRENT_TIME, 609 TokenType.CURRENT_USER, 610 TokenType.FILTER, 611 TokenType.FIRST, 612 TokenType.FORMAT, 613 TokenType.GET, 614 TokenType.GLOB, 615 TokenType.IDENTIFIER, 616 TokenType.INDEX, 617 TokenType.ISNULL, 618 TokenType.ILIKE, 619 TokenType.INSERT, 620 TokenType.LIKE, 621 TokenType.MERGE, 622 TokenType.NEXT, 623 TokenType.OFFSET, 624 TokenType.PRIMARY_KEY, 625 TokenType.RANGE, 626 TokenType.REPLACE, 627 TokenType.RLIKE, 628 TokenType.ROW, 629 TokenType.UNNEST, 630 TokenType.VAR, 631 TokenType.LEFT, 632 TokenType.RIGHT, 633 TokenType.SEQUENCE, 634 TokenType.DATE, 635 TokenType.DATETIME, 636 TokenType.TABLE, 637 TokenType.TIMESTAMP, 638 TokenType.TIMESTAMPTZ, 639 TokenType.TRUNCATE, 640 TokenType.WINDOW, 641 TokenType.XOR, 642 *TYPE_TOKENS, 643 *SUBQUERY_PREDICATES, 644 } 645 646 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.AND: exp.And, 648 } 649 650 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.COLON_EQ: exp.PropertyEQ, 652 } 653 654 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.OR: exp.Or, 656 } 657 658 EQUALITY = { 659 TokenType.EQ: exp.EQ, 660 TokenType.NEQ: exp.NEQ, 661 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 662 } 663 664 COMPARISON = { 665 TokenType.GT: exp.GT, 666 TokenType.GTE: exp.GTE, 667 TokenType.LT: exp.LT, 668 TokenType.LTE: exp.LTE, 669 } 670 671 BITWISE = { 672 TokenType.AMP: exp.BitwiseAnd, 673 TokenType.CARET: exp.BitwiseXor, 674 TokenType.PIPE: exp.BitwiseOr, 675 } 676 677 TERM = { 678 TokenType.DASH: exp.Sub, 679 TokenType.PLUS: exp.Add, 680 TokenType.MOD: exp.Mod, 681 TokenType.COLLATE: exp.Collate, 682 } 683 684 FACTOR = { 685 TokenType.DIV: exp.IntDiv, 686 TokenType.LR_ARROW: exp.Distance, 687 TokenType.SLASH: exp.Div, 688 TokenType.STAR: exp.Mul, 689 } 690 691 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 692 693 TIMES = { 694 TokenType.TIME, 695 TokenType.TIMETZ, 696 } 697 698 TIMESTAMPS = { 699 TokenType.TIMESTAMP, 700 TokenType.TIMESTAMPNTZ, 701 TokenType.TIMESTAMPTZ, 702 TokenType.TIMESTAMPLTZ, 703 *TIMES, 704 } 705 706 SET_OPERATIONS = { 707 TokenType.UNION, 708 TokenType.INTERSECT, 709 TokenType.EXCEPT, 710 } 711 712 JOIN_METHODS = { 713 TokenType.ASOF, 714 TokenType.NATURAL, 715 TokenType.POSITIONAL, 716 } 717 718 JOIN_SIDES = { 719 TokenType.LEFT, 720 TokenType.RIGHT, 721 TokenType.FULL, 722 } 723 724 JOIN_KINDS = { 725 TokenType.ANTI, 726 TokenType.CROSS, 727 TokenType.INNER, 728 TokenType.OUTER, 729 TokenType.SEMI, 730 TokenType.STRAIGHT_JOIN, 731 } 732 733 JOIN_HINTS: t.Set[str] = set() 734 735 LAMBDAS = { 736 TokenType.ARROW: lambda self, expressions: self.expression( 737 exp.Lambda, 738 this=self._replace_lambda( 739 self._parse_assignment(), 740 expressions, 741 ), 742 expressions=expressions, 743 ), 744 TokenType.FARROW: lambda self, expressions: self.expression( 745 exp.Kwarg, 746 this=exp.var(expressions[0].name), 747 expression=self._parse_assignment(), 748 ), 749 } 750 751 COLUMN_OPERATORS = { 752 TokenType.DOT: None, 753 TokenType.DOTCOLON: lambda self, this, to: self.expression( 754 exp.JSONCast, 755 this=this, 756 to=to, 757 ), 758 TokenType.DCOLON: lambda self, this, to: self.expression( 759 exp.Cast if self.STRICT_CAST else exp.TryCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.ARROW: lambda self, this, path: self.expression( 764 exp.JSONExtract, 765 this=this, 766 expression=self.dialect.to_json_path(path), 767 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 768 ), 769 TokenType.DARROW: lambda self, this, path: self.expression( 770 exp.JSONExtractScalar, 771 this=this, 772 expression=self.dialect.to_json_path(path), 773 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 774 ), 775 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 776 exp.JSONBExtract, 777 this=this, 778 expression=path, 779 ), 780 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 781 exp.JSONBExtractScalar, 782 this=this, 783 expression=path, 784 ), 785 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 786 exp.JSONBContains, 787 this=this, 788 expression=key, 789 ), 790 } 791 792 EXPRESSION_PARSERS = { 793 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 794 exp.Column: lambda self: self._parse_column(), 795 exp.Condition: lambda self: self._parse_assignment(), 796 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 797 exp.Expression: lambda self: self._parse_expression(), 798 exp.From: lambda self: self._parse_from(joins=True), 799 exp.Group: lambda self: self._parse_group(), 800 exp.Having: lambda self: self._parse_having(), 801 exp.Hint: lambda self: self._parse_hint_body(), 802 exp.Identifier: lambda self: self._parse_id_var(), 803 exp.Join: lambda self: self._parse_join(), 804 exp.Lambda: lambda self: self._parse_lambda(), 805 exp.Lateral: lambda self: self._parse_lateral(), 806 exp.Limit: lambda self: self._parse_limit(), 807 exp.Offset: lambda self: self._parse_offset(), 808 exp.Order: lambda self: self._parse_order(), 809 exp.Ordered: lambda self: self._parse_ordered(), 810 exp.Properties: lambda self: self._parse_properties(), 811 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 812 exp.Qualify: lambda self: self._parse_qualify(), 813 exp.Returning: lambda self: self._parse_returning(), 814 exp.Select: lambda self: self._parse_select(), 815 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 816 exp.Table: lambda self: self._parse_table_parts(), 817 exp.TableAlias: lambda self: self._parse_table_alias(), 818 exp.Tuple: lambda self: self._parse_value(values=False), 819 exp.Whens: lambda self: self._parse_when_matched(), 820 exp.Where: lambda self: self._parse_where(), 821 exp.Window: lambda self: self._parse_named_window(), 822 exp.With: lambda self: self._parse_with(), 823 "JOIN_TYPE": lambda self: self._parse_join_parts(), 824 } 825 826 STATEMENT_PARSERS = { 827 TokenType.ALTER: lambda self: self._parse_alter(), 828 TokenType.ANALYZE: lambda self: self._parse_analyze(), 829 TokenType.BEGIN: lambda self: self._parse_transaction(), 830 TokenType.CACHE: lambda self: self._parse_cache(), 831 TokenType.COMMENT: lambda self: self._parse_comment(), 832 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 833 TokenType.COPY: lambda self: self._parse_copy(), 834 TokenType.CREATE: lambda self: self._parse_create(), 835 TokenType.DELETE: lambda self: self._parse_delete(), 836 TokenType.DESC: lambda self: self._parse_describe(), 837 TokenType.DESCRIBE: lambda self: self._parse_describe(), 838 TokenType.DROP: lambda self: self._parse_drop(), 839 TokenType.GRANT: lambda self: self._parse_grant(), 840 TokenType.INSERT: lambda self: self._parse_insert(), 841 TokenType.KILL: lambda self: self._parse_kill(), 842 TokenType.LOAD: lambda self: self._parse_load(), 843 TokenType.MERGE: lambda self: self._parse_merge(), 844 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 845 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 846 TokenType.REFRESH: lambda self: self._parse_refresh(), 847 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 848 TokenType.SET: lambda self: self._parse_set(), 849 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 850 TokenType.UNCACHE: lambda self: self._parse_uncache(), 851 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 852 TokenType.UPDATE: lambda self: self._parse_update(), 853 TokenType.USE: lambda self: self._parse_use(), 854 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 855 } 856 857 UNARY_PARSERS = { 858 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 859 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 860 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 861 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 862 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 863 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 864 } 865 866 STRING_PARSERS = { 867 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 868 exp.RawString, this=token.text 869 ), 870 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 871 exp.National, this=token.text 872 ), 873 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 874 TokenType.STRING: lambda self, token: self.expression( 875 exp.Literal, this=token.text, is_string=True 876 ), 877 TokenType.UNICODE_STRING: lambda self, token: self.expression( 878 exp.UnicodeString, 879 this=token.text, 880 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 881 ), 882 } 883 884 NUMERIC_PARSERS = { 885 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 886 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 887 TokenType.HEX_STRING: lambda self, token: self.expression( 888 exp.HexString, 889 this=token.text, 890 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 891 ), 892 TokenType.NUMBER: lambda self, token: self.expression( 893 exp.Literal, this=token.text, is_string=False 894 ), 895 } 896 897 PRIMARY_PARSERS = { 898 **STRING_PARSERS, 899 **NUMERIC_PARSERS, 900 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 901 TokenType.NULL: lambda self, _: self.expression(exp.Null), 902 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 903 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 904 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 905 TokenType.STAR: lambda self, _: self._parse_star_ops(), 906 } 907 908 PLACEHOLDER_PARSERS = { 909 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 910 TokenType.PARAMETER: lambda self: self._parse_parameter(), 911 TokenType.COLON: lambda self: ( 912 self.expression(exp.Placeholder, this=self._prev.text) 913 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 914 else None 915 ), 916 } 917 918 RANGE_PARSERS = { 919 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 920 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 921 TokenType.GLOB: binary_range_parser(exp.Glob), 922 TokenType.ILIKE: binary_range_parser(exp.ILike), 923 TokenType.IN: lambda self, this: self._parse_in(this), 924 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 925 TokenType.IS: lambda self, this: self._parse_is(this), 926 TokenType.LIKE: binary_range_parser(exp.Like), 927 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 928 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 929 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 930 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 931 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 932 } 933 934 PIPE_SYNTAX_TRANSFORM_PARSERS = { 935 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 936 "WHERE": lambda self, query: self._parse_pipe_syntax_where(query), 937 "ORDER BY": lambda self, query: query.order_by( 938 self._parse_order(), append=False, copy=False 939 ), 940 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 941 "OFFSET": lambda self, query: query.offset(self._parse_offset(), copy=False), 942 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 943 } 944 945 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 946 "ALLOWED_VALUES": lambda self: self.expression( 947 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 948 ), 949 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 950 "AUTO": lambda self: self._parse_auto_property(), 951 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 952 "BACKUP": lambda self: self.expression( 953 exp.BackupProperty, this=self._parse_var(any_token=True) 954 ), 955 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 956 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 957 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 958 "CHECKSUM": lambda self: self._parse_checksum(), 959 "CLUSTER BY": lambda self: self._parse_cluster(), 960 "CLUSTERED": lambda self: self._parse_clustered_by(), 961 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 962 exp.CollateProperty, **kwargs 963 ), 964 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 965 "CONTAINS": lambda self: self._parse_contains_property(), 966 "COPY": lambda self: self._parse_copy_property(), 967 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 968 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 969 "DEFINER": lambda self: self._parse_definer(), 970 "DETERMINISTIC": lambda self: self.expression( 971 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 972 ), 973 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 974 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 975 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 976 "DISTKEY": lambda self: self._parse_distkey(), 977 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 978 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 979 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 980 "ENVIRONMENT": lambda self: self.expression( 981 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 982 ), 983 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 984 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 985 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 986 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 987 "FREESPACE": lambda self: self._parse_freespace(), 988 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 989 "HEAP": lambda self: self.expression(exp.HeapProperty), 990 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 991 "IMMUTABLE": lambda self: self.expression( 992 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 993 ), 994 "INHERITS": lambda self: self.expression( 995 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 996 ), 997 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 998 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 999 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1000 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1001 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1002 "LIKE": lambda self: self._parse_create_like(), 1003 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1004 "LOCK": lambda self: self._parse_locking(), 1005 "LOCKING": lambda self: self._parse_locking(), 1006 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1007 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1008 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1009 "MODIFIES": lambda self: self._parse_modifies_property(), 1010 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1011 "NO": lambda self: self._parse_no_property(), 1012 "ON": lambda self: self._parse_on_property(), 1013 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1014 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1015 "PARTITION": lambda self: self._parse_partitioned_of(), 1016 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1017 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1018 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1019 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1020 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1021 "READS": lambda self: self._parse_reads_property(), 1022 "REMOTE": lambda self: self._parse_remote_with_connection(), 1023 "RETURNS": lambda self: self._parse_returns(), 1024 "STRICT": lambda self: self.expression(exp.StrictProperty), 1025 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1026 "ROW": lambda self: self._parse_row(), 1027 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1028 "SAMPLE": lambda self: self.expression( 1029 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1030 ), 1031 "SECURE": lambda self: self.expression(exp.SecureProperty), 1032 "SECURITY": lambda self: self._parse_security(), 1033 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1034 "SETTINGS": lambda self: self._parse_settings_property(), 1035 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1036 "SORTKEY": lambda self: self._parse_sortkey(), 1037 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1038 "STABLE": lambda self: self.expression( 1039 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1040 ), 1041 "STORED": lambda self: self._parse_stored(), 1042 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1043 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1044 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1045 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1046 "TO": lambda self: self._parse_to_table(), 1047 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1048 "TRANSFORM": lambda self: self.expression( 1049 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1050 ), 1051 "TTL": lambda self: self._parse_ttl(), 1052 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1053 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1054 "VOLATILE": lambda self: self._parse_volatile_property(), 1055 "WITH": lambda self: self._parse_with_property(), 1056 } 1057 1058 CONSTRAINT_PARSERS = { 1059 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1060 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1061 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1062 "CHARACTER SET": lambda self: self.expression( 1063 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1064 ), 1065 "CHECK": lambda self: self.expression( 1066 exp.CheckColumnConstraint, 1067 this=self._parse_wrapped(self._parse_assignment), 1068 enforced=self._match_text_seq("ENFORCED"), 1069 ), 1070 "COLLATE": lambda self: self.expression( 1071 exp.CollateColumnConstraint, 1072 this=self._parse_identifier() or self._parse_column(), 1073 ), 1074 "COMMENT": lambda self: self.expression( 1075 exp.CommentColumnConstraint, this=self._parse_string() 1076 ), 1077 "COMPRESS": lambda self: self._parse_compress(), 1078 "CLUSTERED": lambda self: self.expression( 1079 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1080 ), 1081 "NONCLUSTERED": lambda self: self.expression( 1082 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1083 ), 1084 "DEFAULT": lambda self: self.expression( 1085 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1086 ), 1087 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1088 "EPHEMERAL": lambda self: self.expression( 1089 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1090 ), 1091 "EXCLUDE": lambda self: self.expression( 1092 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1093 ), 1094 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1095 "FORMAT": lambda self: self.expression( 1096 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1097 ), 1098 "GENERATED": lambda self: self._parse_generated_as_identity(), 1099 "IDENTITY": lambda self: self._parse_auto_increment(), 1100 "INLINE": lambda self: self._parse_inline(), 1101 "LIKE": lambda self: self._parse_create_like(), 1102 "NOT": lambda self: self._parse_not_constraint(), 1103 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1104 "ON": lambda self: ( 1105 self._match(TokenType.UPDATE) 1106 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1107 ) 1108 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1109 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1110 "PERIOD": lambda self: self._parse_period_for_system_time(), 1111 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1112 "REFERENCES": lambda self: self._parse_references(match=False), 1113 "TITLE": lambda self: self.expression( 1114 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1115 ), 1116 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1117 "UNIQUE": lambda self: self._parse_unique(), 1118 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1119 "WATERMARK": lambda self: self.expression( 1120 exp.WatermarkColumnConstraint, 1121 this=self._match(TokenType.FOR) and self._parse_column(), 1122 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1123 ), 1124 "WITH": lambda self: self.expression( 1125 exp.Properties, expressions=self._parse_wrapped_properties() 1126 ), 1127 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1128 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1129 } 1130 1131 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1132 klass = ( 1133 exp.PartitionedByBucket 1134 if self._prev.text.upper() == "BUCKET" 1135 else exp.PartitionByTruncate 1136 ) 1137 1138 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1139 this, expression = seq_get(args, 0), seq_get(args, 1) 1140 1141 if isinstance(this, exp.Literal): 1142 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1143 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1144 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1145 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1146 # 1147 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1148 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1149 this, expression = expression, this 1150 1151 return self.expression(klass, this=this, expression=expression) 1152 1153 ALTER_PARSERS = { 1154 "ADD": lambda self: self._parse_alter_table_add(), 1155 "AS": lambda self: self._parse_select(), 1156 "ALTER": lambda self: self._parse_alter_table_alter(), 1157 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1158 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1159 "DROP": lambda self: self._parse_alter_table_drop(), 1160 "RENAME": lambda self: self._parse_alter_table_rename(), 1161 "SET": lambda self: self._parse_alter_table_set(), 1162 "SWAP": lambda self: self.expression( 1163 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1164 ), 1165 } 1166 1167 ALTER_ALTER_PARSERS = { 1168 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1169 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1170 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1171 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1172 } 1173 1174 SCHEMA_UNNAMED_CONSTRAINTS = { 1175 "CHECK", 1176 "EXCLUDE", 1177 "FOREIGN KEY", 1178 "LIKE", 1179 "PERIOD", 1180 "PRIMARY KEY", 1181 "UNIQUE", 1182 "WATERMARK", 1183 "BUCKET", 1184 "TRUNCATE", 1185 } 1186 1187 NO_PAREN_FUNCTION_PARSERS = { 1188 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1189 "CASE": lambda self: self._parse_case(), 1190 "CONNECT_BY_ROOT": lambda self: self.expression( 1191 exp.ConnectByRoot, this=self._parse_column() 1192 ), 1193 "IF": lambda self: self._parse_if(), 1194 } 1195 1196 INVALID_FUNC_NAME_TOKENS = { 1197 TokenType.IDENTIFIER, 1198 TokenType.STRING, 1199 } 1200 1201 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1202 1203 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1204 1205 FUNCTION_PARSERS = { 1206 **{ 1207 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1208 }, 1209 **{ 1210 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1211 }, 1212 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1213 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1214 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1215 "DECODE": lambda self: self._parse_decode(), 1216 "EXTRACT": lambda self: self._parse_extract(), 1217 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1218 "GAP_FILL": lambda self: self._parse_gap_fill(), 1219 "JSON_OBJECT": lambda self: self._parse_json_object(), 1220 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1221 "JSON_TABLE": lambda self: self._parse_json_table(), 1222 "MATCH": lambda self: self._parse_match_against(), 1223 "NORMALIZE": lambda self: self._parse_normalize(), 1224 "OPENJSON": lambda self: self._parse_open_json(), 1225 "OVERLAY": lambda self: self._parse_overlay(), 1226 "POSITION": lambda self: self._parse_position(), 1227 "PREDICT": lambda self: self._parse_predict(), 1228 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1229 "STRING_AGG": lambda self: self._parse_string_agg(), 1230 "SUBSTRING": lambda self: self._parse_substring(), 1231 "TRIM": lambda self: self._parse_trim(), 1232 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1233 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1234 "XMLELEMENT": lambda self: self.expression( 1235 exp.XMLElement, 1236 this=self._match_text_seq("NAME") and self._parse_id_var(), 1237 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1238 ), 1239 "XMLTABLE": lambda self: self._parse_xml_table(), 1240 } 1241 1242 QUERY_MODIFIER_PARSERS = { 1243 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1244 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1245 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1246 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1247 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1248 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1249 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1250 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1251 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1252 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1253 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1254 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1255 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1256 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1257 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1258 TokenType.CLUSTER_BY: lambda self: ( 1259 "cluster", 1260 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1261 ), 1262 TokenType.DISTRIBUTE_BY: lambda self: ( 1263 "distribute", 1264 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1265 ), 1266 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1267 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1268 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1269 } 1270 1271 SET_PARSERS = { 1272 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1273 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1274 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1275 "TRANSACTION": lambda self: self._parse_set_transaction(), 1276 } 1277 1278 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1279 1280 TYPE_LITERAL_PARSERS = { 1281 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1282 } 1283 1284 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1285 1286 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1287 1288 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1289 1290 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1291 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1292 "ISOLATION": ( 1293 ("LEVEL", "REPEATABLE", "READ"), 1294 ("LEVEL", "READ", "COMMITTED"), 1295 ("LEVEL", "READ", "UNCOMITTED"), 1296 ("LEVEL", "SERIALIZABLE"), 1297 ), 1298 "READ": ("WRITE", "ONLY"), 1299 } 1300 1301 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1302 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1303 ) 1304 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1305 1306 CREATE_SEQUENCE: OPTIONS_TYPE = { 1307 "SCALE": ("EXTEND", "NOEXTEND"), 1308 "SHARD": ("EXTEND", "NOEXTEND"), 1309 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1310 **dict.fromkeys( 1311 ( 1312 "SESSION", 1313 "GLOBAL", 1314 "KEEP", 1315 "NOKEEP", 1316 "ORDER", 1317 "NOORDER", 1318 "NOCACHE", 1319 "CYCLE", 1320 "NOCYCLE", 1321 "NOMINVALUE", 1322 "NOMAXVALUE", 1323 "NOSCALE", 1324 "NOSHARD", 1325 ), 1326 tuple(), 1327 ), 1328 } 1329 1330 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1331 1332 USABLES: OPTIONS_TYPE = dict.fromkeys( 1333 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1334 ) 1335 1336 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1337 1338 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1339 "TYPE": ("EVOLUTION",), 1340 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1341 } 1342 1343 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1344 1345 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1346 1347 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1348 "NOT": ("ENFORCED",), 1349 "MATCH": ( 1350 "FULL", 1351 "PARTIAL", 1352 "SIMPLE", 1353 ), 1354 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1355 "USING": ( 1356 "BTREE", 1357 "HASH", 1358 ), 1359 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1360 } 1361 1362 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1363 "NO": ("OTHERS",), 1364 "CURRENT": ("ROW",), 1365 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1366 } 1367 1368 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1369 1370 CLONE_KEYWORDS = {"CLONE", "COPY"} 1371 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1372 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1373 1374 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1375 1376 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1377 1378 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1379 1380 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1381 1382 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1383 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1384 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1385 1386 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1387 1388 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1389 1390 ADD_CONSTRAINT_TOKENS = { 1391 TokenType.CONSTRAINT, 1392 TokenType.FOREIGN_KEY, 1393 TokenType.INDEX, 1394 TokenType.KEY, 1395 TokenType.PRIMARY_KEY, 1396 TokenType.UNIQUE, 1397 } 1398 1399 DISTINCT_TOKENS = {TokenType.DISTINCT} 1400 1401 NULL_TOKENS = {TokenType.NULL} 1402 1403 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1404 1405 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1406 1407 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1408 1409 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1410 1411 ODBC_DATETIME_LITERALS = { 1412 "d": exp.Date, 1413 "t": exp.Time, 1414 "ts": exp.Timestamp, 1415 } 1416 1417 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1418 1419 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1420 1421 # The style options for the DESCRIBE statement 1422 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1423 1424 # The style options for the ANALYZE statement 1425 ANALYZE_STYLES = { 1426 "BUFFER_USAGE_LIMIT", 1427 "FULL", 1428 "LOCAL", 1429 "NO_WRITE_TO_BINLOG", 1430 "SAMPLE", 1431 "SKIP_LOCKED", 1432 "VERBOSE", 1433 } 1434 1435 ANALYZE_EXPRESSION_PARSERS = { 1436 "ALL": lambda self: self._parse_analyze_columns(), 1437 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1438 "DELETE": lambda self: self._parse_analyze_delete(), 1439 "DROP": lambda self: self._parse_analyze_histogram(), 1440 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1441 "LIST": lambda self: self._parse_analyze_list(), 1442 "PREDICATE": lambda self: self._parse_analyze_columns(), 1443 "UPDATE": lambda self: self._parse_analyze_histogram(), 1444 "VALIDATE": lambda self: self._parse_analyze_validate(), 1445 } 1446 1447 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1448 1449 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1450 1451 OPERATION_MODIFIERS: t.Set[str] = set() 1452 1453 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1454 1455 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1456 1457 STRICT_CAST = True 1458 1459 PREFIXED_PIVOT_COLUMNS = False 1460 IDENTIFY_PIVOT_STRINGS = False 1461 1462 LOG_DEFAULTS_TO_LN = False 1463 1464 # Whether the table sample clause expects CSV syntax 1465 TABLESAMPLE_CSV = False 1466 1467 # The default method used for table sampling 1468 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1469 1470 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1471 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1472 1473 # Whether the TRIM function expects the characters to trim as its first argument 1474 TRIM_PATTERN_FIRST = False 1475 1476 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1477 STRING_ALIASES = False 1478 1479 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1480 MODIFIERS_ATTACHED_TO_SET_OP = True 1481 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1482 1483 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1484 NO_PAREN_IF_COMMANDS = True 1485 1486 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1487 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1488 1489 # Whether the `:` operator is used to extract a value from a VARIANT column 1490 COLON_IS_VARIANT_EXTRACT = False 1491 1492 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1493 # If this is True and '(' is not found, the keyword will be treated as an identifier 1494 VALUES_FOLLOWED_BY_PAREN = True 1495 1496 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1497 SUPPORTS_IMPLICIT_UNNEST = False 1498 1499 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1500 INTERVAL_SPANS = True 1501 1502 # Whether a PARTITION clause can follow a table reference 1503 SUPPORTS_PARTITION_SELECTION = False 1504 1505 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1506 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1507 1508 # Whether the 'AS' keyword is optional in the CTE definition syntax 1509 OPTIONAL_ALIAS_TOKEN_CTE = True 1510 1511 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1512 ALTER_RENAME_REQUIRES_COLUMN = True 1513 1514 __slots__ = ( 1515 "error_level", 1516 "error_message_context", 1517 "max_errors", 1518 "dialect", 1519 "sql", 1520 "errors", 1521 "_tokens", 1522 "_index", 1523 "_curr", 1524 "_next", 1525 "_prev", 1526 "_prev_comments", 1527 "_pipe_cte_counter", 1528 ) 1529 1530 # Autofilled 1531 SHOW_TRIE: t.Dict = {} 1532 SET_TRIE: t.Dict = {} 1533 1534 def __init__( 1535 self, 1536 error_level: t.Optional[ErrorLevel] = None, 1537 error_message_context: int = 100, 1538 max_errors: int = 3, 1539 dialect: DialectType = None, 1540 ): 1541 from sqlglot.dialects import Dialect 1542 1543 self.error_level = error_level or ErrorLevel.IMMEDIATE 1544 self.error_message_context = error_message_context 1545 self.max_errors = max_errors 1546 self.dialect = Dialect.get_or_raise(dialect) 1547 self.reset() 1548 1549 def reset(self): 1550 self.sql = "" 1551 self.errors = [] 1552 self._tokens = [] 1553 self._index = 0 1554 self._curr = None 1555 self._next = None 1556 self._prev = None 1557 self._prev_comments = None 1558 self._pipe_cte_counter = 0 1559 1560 def parse( 1561 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1562 ) -> t.List[t.Optional[exp.Expression]]: 1563 """ 1564 Parses a list of tokens and returns a list of syntax trees, one tree 1565 per parsed SQL statement. 1566 1567 Args: 1568 raw_tokens: The list of tokens. 1569 sql: The original SQL string, used to produce helpful debug messages. 1570 1571 Returns: 1572 The list of the produced syntax trees. 1573 """ 1574 return self._parse( 1575 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1576 ) 1577 1578 def parse_into( 1579 self, 1580 expression_types: exp.IntoType, 1581 raw_tokens: t.List[Token], 1582 sql: t.Optional[str] = None, 1583 ) -> t.List[t.Optional[exp.Expression]]: 1584 """ 1585 Parses a list of tokens into a given Expression type. If a collection of Expression 1586 types is given instead, this method will try to parse the token list into each one 1587 of them, stopping at the first for which the parsing succeeds. 1588 1589 Args: 1590 expression_types: The expression type(s) to try and parse the token list into. 1591 raw_tokens: The list of tokens. 1592 sql: The original SQL string, used to produce helpful debug messages. 1593 1594 Returns: 1595 The target Expression. 1596 """ 1597 errors = [] 1598 for expression_type in ensure_list(expression_types): 1599 parser = self.EXPRESSION_PARSERS.get(expression_type) 1600 if not parser: 1601 raise TypeError(f"No parser registered for {expression_type}") 1602 1603 try: 1604 return self._parse(parser, raw_tokens, sql) 1605 except ParseError as e: 1606 e.errors[0]["into_expression"] = expression_type 1607 errors.append(e) 1608 1609 raise ParseError( 1610 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1611 errors=merge_errors(errors), 1612 ) from errors[-1] 1613 1614 def _parse( 1615 self, 1616 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1617 raw_tokens: t.List[Token], 1618 sql: t.Optional[str] = None, 1619 ) -> t.List[t.Optional[exp.Expression]]: 1620 self.reset() 1621 self.sql = sql or "" 1622 1623 total = len(raw_tokens) 1624 chunks: t.List[t.List[Token]] = [[]] 1625 1626 for i, token in enumerate(raw_tokens): 1627 if token.token_type == TokenType.SEMICOLON: 1628 if token.comments: 1629 chunks.append([token]) 1630 1631 if i < total - 1: 1632 chunks.append([]) 1633 else: 1634 chunks[-1].append(token) 1635 1636 expressions = [] 1637 1638 for tokens in chunks: 1639 self._index = -1 1640 self._tokens = tokens 1641 self._advance() 1642 1643 expressions.append(parse_method(self)) 1644 1645 if self._index < len(self._tokens): 1646 self.raise_error("Invalid expression / Unexpected token") 1647 1648 self.check_errors() 1649 1650 return expressions 1651 1652 def check_errors(self) -> None: 1653 """Logs or raises any found errors, depending on the chosen error level setting.""" 1654 if self.error_level == ErrorLevel.WARN: 1655 for error in self.errors: 1656 logger.error(str(error)) 1657 elif self.error_level == ErrorLevel.RAISE and self.errors: 1658 raise ParseError( 1659 concat_messages(self.errors, self.max_errors), 1660 errors=merge_errors(self.errors), 1661 ) 1662 1663 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1664 """ 1665 Appends an error in the list of recorded errors or raises it, depending on the chosen 1666 error level setting. 1667 """ 1668 token = token or self._curr or self._prev or Token.string("") 1669 start = token.start 1670 end = token.end + 1 1671 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1672 highlight = self.sql[start:end] 1673 end_context = self.sql[end : end + self.error_message_context] 1674 1675 error = ParseError.new( 1676 f"{message}. Line {token.line}, Col: {token.col}.\n" 1677 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1678 description=message, 1679 line=token.line, 1680 col=token.col, 1681 start_context=start_context, 1682 highlight=highlight, 1683 end_context=end_context, 1684 ) 1685 1686 if self.error_level == ErrorLevel.IMMEDIATE: 1687 raise error 1688 1689 self.errors.append(error) 1690 1691 def expression( 1692 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1693 ) -> E: 1694 """ 1695 Creates a new, validated Expression. 1696 1697 Args: 1698 exp_class: The expression class to instantiate. 1699 comments: An optional list of comments to attach to the expression. 1700 kwargs: The arguments to set for the expression along with their respective values. 1701 1702 Returns: 1703 The target expression. 1704 """ 1705 instance = exp_class(**kwargs) 1706 instance.add_comments(comments) if comments else self._add_comments(instance) 1707 return self.validate_expression(instance) 1708 1709 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1710 if expression and self._prev_comments: 1711 expression.add_comments(self._prev_comments) 1712 self._prev_comments = None 1713 1714 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1715 """ 1716 Validates an Expression, making sure that all its mandatory arguments are set. 1717 1718 Args: 1719 expression: The expression to validate. 1720 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1721 1722 Returns: 1723 The validated expression. 1724 """ 1725 if self.error_level != ErrorLevel.IGNORE: 1726 for error_message in expression.error_messages(args): 1727 self.raise_error(error_message) 1728 1729 return expression 1730 1731 def _find_sql(self, start: Token, end: Token) -> str: 1732 return self.sql[start.start : end.end + 1] 1733 1734 def _is_connected(self) -> bool: 1735 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1736 1737 def _advance(self, times: int = 1) -> None: 1738 self._index += times 1739 self._curr = seq_get(self._tokens, self._index) 1740 self._next = seq_get(self._tokens, self._index + 1) 1741 1742 if self._index > 0: 1743 self._prev = self._tokens[self._index - 1] 1744 self._prev_comments = self._prev.comments 1745 else: 1746 self._prev = None 1747 self._prev_comments = None 1748 1749 def _retreat(self, index: int) -> None: 1750 if index != self._index: 1751 self._advance(index - self._index) 1752 1753 def _warn_unsupported(self) -> None: 1754 if len(self._tokens) <= 1: 1755 return 1756 1757 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1758 # interested in emitting a warning for the one being currently processed. 1759 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1760 1761 logger.warning( 1762 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1763 ) 1764 1765 def _parse_command(self) -> exp.Command: 1766 self._warn_unsupported() 1767 return self.expression( 1768 exp.Command, 1769 comments=self._prev_comments, 1770 this=self._prev.text.upper(), 1771 expression=self._parse_string(), 1772 ) 1773 1774 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1775 """ 1776 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1777 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1778 solve this by setting & resetting the parser state accordingly 1779 """ 1780 index = self._index 1781 error_level = self.error_level 1782 1783 self.error_level = ErrorLevel.IMMEDIATE 1784 try: 1785 this = parse_method() 1786 except ParseError: 1787 this = None 1788 finally: 1789 if not this or retreat: 1790 self._retreat(index) 1791 self.error_level = error_level 1792 1793 return this 1794 1795 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1796 start = self._prev 1797 exists = self._parse_exists() if allow_exists else None 1798 1799 self._match(TokenType.ON) 1800 1801 materialized = self._match_text_seq("MATERIALIZED") 1802 kind = self._match_set(self.CREATABLES) and self._prev 1803 if not kind: 1804 return self._parse_as_command(start) 1805 1806 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1807 this = self._parse_user_defined_function(kind=kind.token_type) 1808 elif kind.token_type == TokenType.TABLE: 1809 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1810 elif kind.token_type == TokenType.COLUMN: 1811 this = self._parse_column() 1812 else: 1813 this = self._parse_id_var() 1814 1815 self._match(TokenType.IS) 1816 1817 return self.expression( 1818 exp.Comment, 1819 this=this, 1820 kind=kind.text, 1821 expression=self._parse_string(), 1822 exists=exists, 1823 materialized=materialized, 1824 ) 1825 1826 def _parse_to_table( 1827 self, 1828 ) -> exp.ToTableProperty: 1829 table = self._parse_table_parts(schema=True) 1830 return self.expression(exp.ToTableProperty, this=table) 1831 1832 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1833 def _parse_ttl(self) -> exp.Expression: 1834 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1835 this = self._parse_bitwise() 1836 1837 if self._match_text_seq("DELETE"): 1838 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1839 if self._match_text_seq("RECOMPRESS"): 1840 return self.expression( 1841 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1842 ) 1843 if self._match_text_seq("TO", "DISK"): 1844 return self.expression( 1845 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1846 ) 1847 if self._match_text_seq("TO", "VOLUME"): 1848 return self.expression( 1849 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1850 ) 1851 1852 return this 1853 1854 expressions = self._parse_csv(_parse_ttl_action) 1855 where = self._parse_where() 1856 group = self._parse_group() 1857 1858 aggregates = None 1859 if group and self._match(TokenType.SET): 1860 aggregates = self._parse_csv(self._parse_set_item) 1861 1862 return self.expression( 1863 exp.MergeTreeTTL, 1864 expressions=expressions, 1865 where=where, 1866 group=group, 1867 aggregates=aggregates, 1868 ) 1869 1870 def _parse_statement(self) -> t.Optional[exp.Expression]: 1871 if self._curr is None: 1872 return None 1873 1874 if self._match_set(self.STATEMENT_PARSERS): 1875 comments = self._prev_comments 1876 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1877 stmt.add_comments(comments, prepend=True) 1878 return stmt 1879 1880 if self._match_set(self.dialect.tokenizer.COMMANDS): 1881 return self._parse_command() 1882 1883 expression = self._parse_expression() 1884 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1885 return self._parse_query_modifiers(expression) 1886 1887 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1888 start = self._prev 1889 temporary = self._match(TokenType.TEMPORARY) 1890 materialized = self._match_text_seq("MATERIALIZED") 1891 1892 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1893 if not kind: 1894 return self._parse_as_command(start) 1895 1896 concurrently = self._match_text_seq("CONCURRENTLY") 1897 if_exists = exists or self._parse_exists() 1898 1899 if kind == "COLUMN": 1900 this = self._parse_column() 1901 else: 1902 this = self._parse_table_parts( 1903 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1904 ) 1905 1906 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1907 1908 if self._match(TokenType.L_PAREN, advance=False): 1909 expressions = self._parse_wrapped_csv(self._parse_types) 1910 else: 1911 expressions = None 1912 1913 return self.expression( 1914 exp.Drop, 1915 exists=if_exists, 1916 this=this, 1917 expressions=expressions, 1918 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1919 temporary=temporary, 1920 materialized=materialized, 1921 cascade=self._match_text_seq("CASCADE"), 1922 constraints=self._match_text_seq("CONSTRAINTS"), 1923 purge=self._match_text_seq("PURGE"), 1924 cluster=cluster, 1925 concurrently=concurrently, 1926 ) 1927 1928 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1929 return ( 1930 self._match_text_seq("IF") 1931 and (not not_ or self._match(TokenType.NOT)) 1932 and self._match(TokenType.EXISTS) 1933 ) 1934 1935 def _parse_create(self) -> exp.Create | exp.Command: 1936 # Note: this can't be None because we've matched a statement parser 1937 start = self._prev 1938 1939 replace = ( 1940 start.token_type == TokenType.REPLACE 1941 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1942 or self._match_pair(TokenType.OR, TokenType.ALTER) 1943 ) 1944 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1945 1946 unique = self._match(TokenType.UNIQUE) 1947 1948 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1949 clustered = True 1950 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1951 "COLUMNSTORE" 1952 ): 1953 clustered = False 1954 else: 1955 clustered = None 1956 1957 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1958 self._advance() 1959 1960 properties = None 1961 create_token = self._match_set(self.CREATABLES) and self._prev 1962 1963 if not create_token: 1964 # exp.Properties.Location.POST_CREATE 1965 properties = self._parse_properties() 1966 create_token = self._match_set(self.CREATABLES) and self._prev 1967 1968 if not properties or not create_token: 1969 return self._parse_as_command(start) 1970 1971 concurrently = self._match_text_seq("CONCURRENTLY") 1972 exists = self._parse_exists(not_=True) 1973 this = None 1974 expression: t.Optional[exp.Expression] = None 1975 indexes = None 1976 no_schema_binding = None 1977 begin = None 1978 end = None 1979 clone = None 1980 1981 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1982 nonlocal properties 1983 if properties and temp_props: 1984 properties.expressions.extend(temp_props.expressions) 1985 elif temp_props: 1986 properties = temp_props 1987 1988 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1989 this = self._parse_user_defined_function(kind=create_token.token_type) 1990 1991 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1992 extend_props(self._parse_properties()) 1993 1994 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1995 extend_props(self._parse_properties()) 1996 1997 if not expression: 1998 if self._match(TokenType.COMMAND): 1999 expression = self._parse_as_command(self._prev) 2000 else: 2001 begin = self._match(TokenType.BEGIN) 2002 return_ = self._match_text_seq("RETURN") 2003 2004 if self._match(TokenType.STRING, advance=False): 2005 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2006 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2007 expression = self._parse_string() 2008 extend_props(self._parse_properties()) 2009 else: 2010 expression = self._parse_user_defined_function_expression() 2011 2012 end = self._match_text_seq("END") 2013 2014 if return_: 2015 expression = self.expression(exp.Return, this=expression) 2016 elif create_token.token_type == TokenType.INDEX: 2017 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2018 if not self._match(TokenType.ON): 2019 index = self._parse_id_var() 2020 anonymous = False 2021 else: 2022 index = None 2023 anonymous = True 2024 2025 this = self._parse_index(index=index, anonymous=anonymous) 2026 elif create_token.token_type in self.DB_CREATABLES: 2027 table_parts = self._parse_table_parts( 2028 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2029 ) 2030 2031 # exp.Properties.Location.POST_NAME 2032 self._match(TokenType.COMMA) 2033 extend_props(self._parse_properties(before=True)) 2034 2035 this = self._parse_schema(this=table_parts) 2036 2037 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2038 extend_props(self._parse_properties()) 2039 2040 has_alias = self._match(TokenType.ALIAS) 2041 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2042 # exp.Properties.Location.POST_ALIAS 2043 extend_props(self._parse_properties()) 2044 2045 if create_token.token_type == TokenType.SEQUENCE: 2046 expression = self._parse_types() 2047 extend_props(self._parse_properties()) 2048 else: 2049 expression = self._parse_ddl_select() 2050 2051 # Some dialects also support using a table as an alias instead of a SELECT. 2052 # Here we fallback to this as an alternative. 2053 if not expression and has_alias: 2054 expression = self._try_parse(self._parse_table_parts) 2055 2056 if create_token.token_type == TokenType.TABLE: 2057 # exp.Properties.Location.POST_EXPRESSION 2058 extend_props(self._parse_properties()) 2059 2060 indexes = [] 2061 while True: 2062 index = self._parse_index() 2063 2064 # exp.Properties.Location.POST_INDEX 2065 extend_props(self._parse_properties()) 2066 if not index: 2067 break 2068 else: 2069 self._match(TokenType.COMMA) 2070 indexes.append(index) 2071 elif create_token.token_type == TokenType.VIEW: 2072 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2073 no_schema_binding = True 2074 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2075 extend_props(self._parse_properties()) 2076 2077 shallow = self._match_text_seq("SHALLOW") 2078 2079 if self._match_texts(self.CLONE_KEYWORDS): 2080 copy = self._prev.text.lower() == "copy" 2081 clone = self.expression( 2082 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2083 ) 2084 2085 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2086 return self._parse_as_command(start) 2087 2088 create_kind_text = create_token.text.upper() 2089 return self.expression( 2090 exp.Create, 2091 this=this, 2092 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2093 replace=replace, 2094 refresh=refresh, 2095 unique=unique, 2096 expression=expression, 2097 exists=exists, 2098 properties=properties, 2099 indexes=indexes, 2100 no_schema_binding=no_schema_binding, 2101 begin=begin, 2102 end=end, 2103 clone=clone, 2104 concurrently=concurrently, 2105 clustered=clustered, 2106 ) 2107 2108 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2109 seq = exp.SequenceProperties() 2110 2111 options = [] 2112 index = self._index 2113 2114 while self._curr: 2115 self._match(TokenType.COMMA) 2116 if self._match_text_seq("INCREMENT"): 2117 self._match_text_seq("BY") 2118 self._match_text_seq("=") 2119 seq.set("increment", self._parse_term()) 2120 elif self._match_text_seq("MINVALUE"): 2121 seq.set("minvalue", self._parse_term()) 2122 elif self._match_text_seq("MAXVALUE"): 2123 seq.set("maxvalue", self._parse_term()) 2124 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2125 self._match_text_seq("=") 2126 seq.set("start", self._parse_term()) 2127 elif self._match_text_seq("CACHE"): 2128 # T-SQL allows empty CACHE which is initialized dynamically 2129 seq.set("cache", self._parse_number() or True) 2130 elif self._match_text_seq("OWNED", "BY"): 2131 # "OWNED BY NONE" is the default 2132 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2133 else: 2134 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2135 if opt: 2136 options.append(opt) 2137 else: 2138 break 2139 2140 seq.set("options", options if options else None) 2141 return None if self._index == index else seq 2142 2143 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2144 # only used for teradata currently 2145 self._match(TokenType.COMMA) 2146 2147 kwargs = { 2148 "no": self._match_text_seq("NO"), 2149 "dual": self._match_text_seq("DUAL"), 2150 "before": self._match_text_seq("BEFORE"), 2151 "default": self._match_text_seq("DEFAULT"), 2152 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2153 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2154 "after": self._match_text_seq("AFTER"), 2155 "minimum": self._match_texts(("MIN", "MINIMUM")), 2156 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2157 } 2158 2159 if self._match_texts(self.PROPERTY_PARSERS): 2160 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2161 try: 2162 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2163 except TypeError: 2164 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2165 2166 return None 2167 2168 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2169 return self._parse_wrapped_csv(self._parse_property) 2170 2171 def _parse_property(self) -> t.Optional[exp.Expression]: 2172 if self._match_texts(self.PROPERTY_PARSERS): 2173 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2174 2175 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2176 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2177 2178 if self._match_text_seq("COMPOUND", "SORTKEY"): 2179 return self._parse_sortkey(compound=True) 2180 2181 if self._match_text_seq("SQL", "SECURITY"): 2182 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2183 2184 index = self._index 2185 key = self._parse_column() 2186 2187 if not self._match(TokenType.EQ): 2188 self._retreat(index) 2189 return self._parse_sequence_properties() 2190 2191 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2192 if isinstance(key, exp.Column): 2193 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2194 2195 value = self._parse_bitwise() or self._parse_var(any_token=True) 2196 2197 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2198 if isinstance(value, exp.Column): 2199 value = exp.var(value.name) 2200 2201 return self.expression(exp.Property, this=key, value=value) 2202 2203 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2204 if self._match_text_seq("BY"): 2205 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2206 2207 self._match(TokenType.ALIAS) 2208 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2209 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2210 2211 return self.expression( 2212 exp.FileFormatProperty, 2213 this=( 2214 self.expression( 2215 exp.InputOutputFormat, 2216 input_format=input_format, 2217 output_format=output_format, 2218 ) 2219 if input_format or output_format 2220 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2221 ), 2222 ) 2223 2224 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2225 field = self._parse_field() 2226 if isinstance(field, exp.Identifier) and not field.quoted: 2227 field = exp.var(field) 2228 2229 return field 2230 2231 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2232 self._match(TokenType.EQ) 2233 self._match(TokenType.ALIAS) 2234 2235 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2236 2237 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2238 properties = [] 2239 while True: 2240 if before: 2241 prop = self._parse_property_before() 2242 else: 2243 prop = self._parse_property() 2244 if not prop: 2245 break 2246 for p in ensure_list(prop): 2247 properties.append(p) 2248 2249 if properties: 2250 return self.expression(exp.Properties, expressions=properties) 2251 2252 return None 2253 2254 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2255 return self.expression( 2256 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2257 ) 2258 2259 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2260 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2261 security_specifier = self._prev.text.upper() 2262 return self.expression(exp.SecurityProperty, this=security_specifier) 2263 return None 2264 2265 def _parse_settings_property(self) -> exp.SettingsProperty: 2266 return self.expression( 2267 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2268 ) 2269 2270 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2271 if self._index >= 2: 2272 pre_volatile_token = self._tokens[self._index - 2] 2273 else: 2274 pre_volatile_token = None 2275 2276 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2277 return exp.VolatileProperty() 2278 2279 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2280 2281 def _parse_retention_period(self) -> exp.Var: 2282 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2283 number = self._parse_number() 2284 number_str = f"{number} " if number else "" 2285 unit = self._parse_var(any_token=True) 2286 return exp.var(f"{number_str}{unit}") 2287 2288 def _parse_system_versioning_property( 2289 self, with_: bool = False 2290 ) -> exp.WithSystemVersioningProperty: 2291 self._match(TokenType.EQ) 2292 prop = self.expression( 2293 exp.WithSystemVersioningProperty, 2294 **{ # type: ignore 2295 "on": True, 2296 "with": with_, 2297 }, 2298 ) 2299 2300 if self._match_text_seq("OFF"): 2301 prop.set("on", False) 2302 return prop 2303 2304 self._match(TokenType.ON) 2305 if self._match(TokenType.L_PAREN): 2306 while self._curr and not self._match(TokenType.R_PAREN): 2307 if self._match_text_seq("HISTORY_TABLE", "="): 2308 prop.set("this", self._parse_table_parts()) 2309 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2310 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2311 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2312 prop.set("retention_period", self._parse_retention_period()) 2313 2314 self._match(TokenType.COMMA) 2315 2316 return prop 2317 2318 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2319 self._match(TokenType.EQ) 2320 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2321 prop = self.expression(exp.DataDeletionProperty, on=on) 2322 2323 if self._match(TokenType.L_PAREN): 2324 while self._curr and not self._match(TokenType.R_PAREN): 2325 if self._match_text_seq("FILTER_COLUMN", "="): 2326 prop.set("filter_column", self._parse_column()) 2327 elif self._match_text_seq("RETENTION_PERIOD", "="): 2328 prop.set("retention_period", self._parse_retention_period()) 2329 2330 self._match(TokenType.COMMA) 2331 2332 return prop 2333 2334 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2335 kind = "HASH" 2336 expressions: t.Optional[t.List[exp.Expression]] = None 2337 if self._match_text_seq("BY", "HASH"): 2338 expressions = self._parse_wrapped_csv(self._parse_id_var) 2339 elif self._match_text_seq("BY", "RANDOM"): 2340 kind = "RANDOM" 2341 2342 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2343 buckets: t.Optional[exp.Expression] = None 2344 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2345 buckets = self._parse_number() 2346 2347 return self.expression( 2348 exp.DistributedByProperty, 2349 expressions=expressions, 2350 kind=kind, 2351 buckets=buckets, 2352 order=self._parse_order(), 2353 ) 2354 2355 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2356 self._match_text_seq("KEY") 2357 expressions = self._parse_wrapped_id_vars() 2358 return self.expression(expr_type, expressions=expressions) 2359 2360 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2361 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2362 prop = self._parse_system_versioning_property(with_=True) 2363 self._match_r_paren() 2364 return prop 2365 2366 if self._match(TokenType.L_PAREN, advance=False): 2367 return self._parse_wrapped_properties() 2368 2369 if self._match_text_seq("JOURNAL"): 2370 return self._parse_withjournaltable() 2371 2372 if self._match_texts(self.VIEW_ATTRIBUTES): 2373 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2374 2375 if self._match_text_seq("DATA"): 2376 return self._parse_withdata(no=False) 2377 elif self._match_text_seq("NO", "DATA"): 2378 return self._parse_withdata(no=True) 2379 2380 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2381 return self._parse_serde_properties(with_=True) 2382 2383 if self._match(TokenType.SCHEMA): 2384 return self.expression( 2385 exp.WithSchemaBindingProperty, 2386 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2387 ) 2388 2389 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2390 return self.expression( 2391 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2392 ) 2393 2394 if not self._next: 2395 return None 2396 2397 return self._parse_withisolatedloading() 2398 2399 def _parse_procedure_option(self) -> exp.Expression | None: 2400 if self._match_text_seq("EXECUTE", "AS"): 2401 return self.expression( 2402 exp.ExecuteAsProperty, 2403 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2404 or self._parse_string(), 2405 ) 2406 2407 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2408 2409 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2410 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2411 self._match(TokenType.EQ) 2412 2413 user = self._parse_id_var() 2414 self._match(TokenType.PARAMETER) 2415 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2416 2417 if not user or not host: 2418 return None 2419 2420 return exp.DefinerProperty(this=f"{user}@{host}") 2421 2422 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2423 self._match(TokenType.TABLE) 2424 self._match(TokenType.EQ) 2425 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2426 2427 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2428 return self.expression(exp.LogProperty, no=no) 2429 2430 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2431 return self.expression(exp.JournalProperty, **kwargs) 2432 2433 def _parse_checksum(self) -> exp.ChecksumProperty: 2434 self._match(TokenType.EQ) 2435 2436 on = None 2437 if self._match(TokenType.ON): 2438 on = True 2439 elif self._match_text_seq("OFF"): 2440 on = False 2441 2442 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2443 2444 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2445 return self.expression( 2446 exp.Cluster, 2447 expressions=( 2448 self._parse_wrapped_csv(self._parse_ordered) 2449 if wrapped 2450 else self._parse_csv(self._parse_ordered) 2451 ), 2452 ) 2453 2454 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2455 self._match_text_seq("BY") 2456 2457 self._match_l_paren() 2458 expressions = self._parse_csv(self._parse_column) 2459 self._match_r_paren() 2460 2461 if self._match_text_seq("SORTED", "BY"): 2462 self._match_l_paren() 2463 sorted_by = self._parse_csv(self._parse_ordered) 2464 self._match_r_paren() 2465 else: 2466 sorted_by = None 2467 2468 self._match(TokenType.INTO) 2469 buckets = self._parse_number() 2470 self._match_text_seq("BUCKETS") 2471 2472 return self.expression( 2473 exp.ClusteredByProperty, 2474 expressions=expressions, 2475 sorted_by=sorted_by, 2476 buckets=buckets, 2477 ) 2478 2479 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2480 if not self._match_text_seq("GRANTS"): 2481 self._retreat(self._index - 1) 2482 return None 2483 2484 return self.expression(exp.CopyGrantsProperty) 2485 2486 def _parse_freespace(self) -> exp.FreespaceProperty: 2487 self._match(TokenType.EQ) 2488 return self.expression( 2489 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2490 ) 2491 2492 def _parse_mergeblockratio( 2493 self, no: bool = False, default: bool = False 2494 ) -> exp.MergeBlockRatioProperty: 2495 if self._match(TokenType.EQ): 2496 return self.expression( 2497 exp.MergeBlockRatioProperty, 2498 this=self._parse_number(), 2499 percent=self._match(TokenType.PERCENT), 2500 ) 2501 2502 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2503 2504 def _parse_datablocksize( 2505 self, 2506 default: t.Optional[bool] = None, 2507 minimum: t.Optional[bool] = None, 2508 maximum: t.Optional[bool] = None, 2509 ) -> exp.DataBlocksizeProperty: 2510 self._match(TokenType.EQ) 2511 size = self._parse_number() 2512 2513 units = None 2514 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2515 units = self._prev.text 2516 2517 return self.expression( 2518 exp.DataBlocksizeProperty, 2519 size=size, 2520 units=units, 2521 default=default, 2522 minimum=minimum, 2523 maximum=maximum, 2524 ) 2525 2526 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2527 self._match(TokenType.EQ) 2528 always = self._match_text_seq("ALWAYS") 2529 manual = self._match_text_seq("MANUAL") 2530 never = self._match_text_seq("NEVER") 2531 default = self._match_text_seq("DEFAULT") 2532 2533 autotemp = None 2534 if self._match_text_seq("AUTOTEMP"): 2535 autotemp = self._parse_schema() 2536 2537 return self.expression( 2538 exp.BlockCompressionProperty, 2539 always=always, 2540 manual=manual, 2541 never=never, 2542 default=default, 2543 autotemp=autotemp, 2544 ) 2545 2546 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2547 index = self._index 2548 no = self._match_text_seq("NO") 2549 concurrent = self._match_text_seq("CONCURRENT") 2550 2551 if not self._match_text_seq("ISOLATED", "LOADING"): 2552 self._retreat(index) 2553 return None 2554 2555 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2556 return self.expression( 2557 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2558 ) 2559 2560 def _parse_locking(self) -> exp.LockingProperty: 2561 if self._match(TokenType.TABLE): 2562 kind = "TABLE" 2563 elif self._match(TokenType.VIEW): 2564 kind = "VIEW" 2565 elif self._match(TokenType.ROW): 2566 kind = "ROW" 2567 elif self._match_text_seq("DATABASE"): 2568 kind = "DATABASE" 2569 else: 2570 kind = None 2571 2572 if kind in ("DATABASE", "TABLE", "VIEW"): 2573 this = self._parse_table_parts() 2574 else: 2575 this = None 2576 2577 if self._match(TokenType.FOR): 2578 for_or_in = "FOR" 2579 elif self._match(TokenType.IN): 2580 for_or_in = "IN" 2581 else: 2582 for_or_in = None 2583 2584 if self._match_text_seq("ACCESS"): 2585 lock_type = "ACCESS" 2586 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2587 lock_type = "EXCLUSIVE" 2588 elif self._match_text_seq("SHARE"): 2589 lock_type = "SHARE" 2590 elif self._match_text_seq("READ"): 2591 lock_type = "READ" 2592 elif self._match_text_seq("WRITE"): 2593 lock_type = "WRITE" 2594 elif self._match_text_seq("CHECKSUM"): 2595 lock_type = "CHECKSUM" 2596 else: 2597 lock_type = None 2598 2599 override = self._match_text_seq("OVERRIDE") 2600 2601 return self.expression( 2602 exp.LockingProperty, 2603 this=this, 2604 kind=kind, 2605 for_or_in=for_or_in, 2606 lock_type=lock_type, 2607 override=override, 2608 ) 2609 2610 def _parse_partition_by(self) -> t.List[exp.Expression]: 2611 if self._match(TokenType.PARTITION_BY): 2612 return self._parse_csv(self._parse_assignment) 2613 return [] 2614 2615 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2616 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2617 if self._match_text_seq("MINVALUE"): 2618 return exp.var("MINVALUE") 2619 if self._match_text_seq("MAXVALUE"): 2620 return exp.var("MAXVALUE") 2621 return self._parse_bitwise() 2622 2623 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2624 expression = None 2625 from_expressions = None 2626 to_expressions = None 2627 2628 if self._match(TokenType.IN): 2629 this = self._parse_wrapped_csv(self._parse_bitwise) 2630 elif self._match(TokenType.FROM): 2631 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2632 self._match_text_seq("TO") 2633 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2634 elif self._match_text_seq("WITH", "(", "MODULUS"): 2635 this = self._parse_number() 2636 self._match_text_seq(",", "REMAINDER") 2637 expression = self._parse_number() 2638 self._match_r_paren() 2639 else: 2640 self.raise_error("Failed to parse partition bound spec.") 2641 2642 return self.expression( 2643 exp.PartitionBoundSpec, 2644 this=this, 2645 expression=expression, 2646 from_expressions=from_expressions, 2647 to_expressions=to_expressions, 2648 ) 2649 2650 # https://www.postgresql.org/docs/current/sql-createtable.html 2651 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2652 if not self._match_text_seq("OF"): 2653 self._retreat(self._index - 1) 2654 return None 2655 2656 this = self._parse_table(schema=True) 2657 2658 if self._match(TokenType.DEFAULT): 2659 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2660 elif self._match_text_seq("FOR", "VALUES"): 2661 expression = self._parse_partition_bound_spec() 2662 else: 2663 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2664 2665 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2666 2667 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2668 self._match(TokenType.EQ) 2669 return self.expression( 2670 exp.PartitionedByProperty, 2671 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2672 ) 2673 2674 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2675 if self._match_text_seq("AND", "STATISTICS"): 2676 statistics = True 2677 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2678 statistics = False 2679 else: 2680 statistics = None 2681 2682 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2683 2684 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2685 if self._match_text_seq("SQL"): 2686 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2687 return None 2688 2689 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2690 if self._match_text_seq("SQL", "DATA"): 2691 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2692 return None 2693 2694 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2695 if self._match_text_seq("PRIMARY", "INDEX"): 2696 return exp.NoPrimaryIndexProperty() 2697 if self._match_text_seq("SQL"): 2698 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2699 return None 2700 2701 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2702 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2703 return exp.OnCommitProperty() 2704 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2705 return exp.OnCommitProperty(delete=True) 2706 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2707 2708 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2709 if self._match_text_seq("SQL", "DATA"): 2710 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2711 return None 2712 2713 def _parse_distkey(self) -> exp.DistKeyProperty: 2714 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2715 2716 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2717 table = self._parse_table(schema=True) 2718 2719 options = [] 2720 while self._match_texts(("INCLUDING", "EXCLUDING")): 2721 this = self._prev.text.upper() 2722 2723 id_var = self._parse_id_var() 2724 if not id_var: 2725 return None 2726 2727 options.append( 2728 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2729 ) 2730 2731 return self.expression(exp.LikeProperty, this=table, expressions=options) 2732 2733 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2734 return self.expression( 2735 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2736 ) 2737 2738 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2739 self._match(TokenType.EQ) 2740 return self.expression( 2741 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2742 ) 2743 2744 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2745 self._match_text_seq("WITH", "CONNECTION") 2746 return self.expression( 2747 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2748 ) 2749 2750 def _parse_returns(self) -> exp.ReturnsProperty: 2751 value: t.Optional[exp.Expression] 2752 null = None 2753 is_table = self._match(TokenType.TABLE) 2754 2755 if is_table: 2756 if self._match(TokenType.LT): 2757 value = self.expression( 2758 exp.Schema, 2759 this="TABLE", 2760 expressions=self._parse_csv(self._parse_struct_types), 2761 ) 2762 if not self._match(TokenType.GT): 2763 self.raise_error("Expecting >") 2764 else: 2765 value = self._parse_schema(exp.var("TABLE")) 2766 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2767 null = True 2768 value = None 2769 else: 2770 value = self._parse_types() 2771 2772 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2773 2774 def _parse_describe(self) -> exp.Describe: 2775 kind = self._match_set(self.CREATABLES) and self._prev.text 2776 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2777 if self._match(TokenType.DOT): 2778 style = None 2779 self._retreat(self._index - 2) 2780 2781 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2782 2783 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2784 this = self._parse_statement() 2785 else: 2786 this = self._parse_table(schema=True) 2787 2788 properties = self._parse_properties() 2789 expressions = properties.expressions if properties else None 2790 partition = self._parse_partition() 2791 return self.expression( 2792 exp.Describe, 2793 this=this, 2794 style=style, 2795 kind=kind, 2796 expressions=expressions, 2797 partition=partition, 2798 format=format, 2799 ) 2800 2801 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2802 kind = self._prev.text.upper() 2803 expressions = [] 2804 2805 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2806 if self._match(TokenType.WHEN): 2807 expression = self._parse_disjunction() 2808 self._match(TokenType.THEN) 2809 else: 2810 expression = None 2811 2812 else_ = self._match(TokenType.ELSE) 2813 2814 if not self._match(TokenType.INTO): 2815 return None 2816 2817 return self.expression( 2818 exp.ConditionalInsert, 2819 this=self.expression( 2820 exp.Insert, 2821 this=self._parse_table(schema=True), 2822 expression=self._parse_derived_table_values(), 2823 ), 2824 expression=expression, 2825 else_=else_, 2826 ) 2827 2828 expression = parse_conditional_insert() 2829 while expression is not None: 2830 expressions.append(expression) 2831 expression = parse_conditional_insert() 2832 2833 return self.expression( 2834 exp.MultitableInserts, 2835 kind=kind, 2836 comments=comments, 2837 expressions=expressions, 2838 source=self._parse_table(), 2839 ) 2840 2841 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2842 comments = [] 2843 hint = self._parse_hint() 2844 overwrite = self._match(TokenType.OVERWRITE) 2845 ignore = self._match(TokenType.IGNORE) 2846 local = self._match_text_seq("LOCAL") 2847 alternative = None 2848 is_function = None 2849 2850 if self._match_text_seq("DIRECTORY"): 2851 this: t.Optional[exp.Expression] = self.expression( 2852 exp.Directory, 2853 this=self._parse_var_or_string(), 2854 local=local, 2855 row_format=self._parse_row_format(match_row=True), 2856 ) 2857 else: 2858 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2859 comments += ensure_list(self._prev_comments) 2860 return self._parse_multitable_inserts(comments) 2861 2862 if self._match(TokenType.OR): 2863 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2864 2865 self._match(TokenType.INTO) 2866 comments += ensure_list(self._prev_comments) 2867 self._match(TokenType.TABLE) 2868 is_function = self._match(TokenType.FUNCTION) 2869 2870 this = ( 2871 self._parse_table(schema=True, parse_partition=True) 2872 if not is_function 2873 else self._parse_function() 2874 ) 2875 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2876 this.set("alias", self._parse_table_alias()) 2877 2878 returning = self._parse_returning() 2879 2880 return self.expression( 2881 exp.Insert, 2882 comments=comments, 2883 hint=hint, 2884 is_function=is_function, 2885 this=this, 2886 stored=self._match_text_seq("STORED") and self._parse_stored(), 2887 by_name=self._match_text_seq("BY", "NAME"), 2888 exists=self._parse_exists(), 2889 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2890 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2891 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2892 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2893 conflict=self._parse_on_conflict(), 2894 returning=returning or self._parse_returning(), 2895 overwrite=overwrite, 2896 alternative=alternative, 2897 ignore=ignore, 2898 source=self._match(TokenType.TABLE) and self._parse_table(), 2899 ) 2900 2901 def _parse_kill(self) -> exp.Kill: 2902 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2903 2904 return self.expression( 2905 exp.Kill, 2906 this=self._parse_primary(), 2907 kind=kind, 2908 ) 2909 2910 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2911 conflict = self._match_text_seq("ON", "CONFLICT") 2912 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2913 2914 if not conflict and not duplicate: 2915 return None 2916 2917 conflict_keys = None 2918 constraint = None 2919 2920 if conflict: 2921 if self._match_text_seq("ON", "CONSTRAINT"): 2922 constraint = self._parse_id_var() 2923 elif self._match(TokenType.L_PAREN): 2924 conflict_keys = self._parse_csv(self._parse_id_var) 2925 self._match_r_paren() 2926 2927 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2928 if self._prev.token_type == TokenType.UPDATE: 2929 self._match(TokenType.SET) 2930 expressions = self._parse_csv(self._parse_equality) 2931 else: 2932 expressions = None 2933 2934 return self.expression( 2935 exp.OnConflict, 2936 duplicate=duplicate, 2937 expressions=expressions, 2938 action=action, 2939 conflict_keys=conflict_keys, 2940 constraint=constraint, 2941 where=self._parse_where(), 2942 ) 2943 2944 def _parse_returning(self) -> t.Optional[exp.Returning]: 2945 if not self._match(TokenType.RETURNING): 2946 return None 2947 return self.expression( 2948 exp.Returning, 2949 expressions=self._parse_csv(self._parse_expression), 2950 into=self._match(TokenType.INTO) and self._parse_table_part(), 2951 ) 2952 2953 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2954 if not self._match(TokenType.FORMAT): 2955 return None 2956 return self._parse_row_format() 2957 2958 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2959 index = self._index 2960 with_ = with_ or self._match_text_seq("WITH") 2961 2962 if not self._match(TokenType.SERDE_PROPERTIES): 2963 self._retreat(index) 2964 return None 2965 return self.expression( 2966 exp.SerdeProperties, 2967 **{ # type: ignore 2968 "expressions": self._parse_wrapped_properties(), 2969 "with": with_, 2970 }, 2971 ) 2972 2973 def _parse_row_format( 2974 self, match_row: bool = False 2975 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2976 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2977 return None 2978 2979 if self._match_text_seq("SERDE"): 2980 this = self._parse_string() 2981 2982 serde_properties = self._parse_serde_properties() 2983 2984 return self.expression( 2985 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2986 ) 2987 2988 self._match_text_seq("DELIMITED") 2989 2990 kwargs = {} 2991 2992 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2993 kwargs["fields"] = self._parse_string() 2994 if self._match_text_seq("ESCAPED", "BY"): 2995 kwargs["escaped"] = self._parse_string() 2996 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2997 kwargs["collection_items"] = self._parse_string() 2998 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2999 kwargs["map_keys"] = self._parse_string() 3000 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3001 kwargs["lines"] = self._parse_string() 3002 if self._match_text_seq("NULL", "DEFINED", "AS"): 3003 kwargs["null"] = self._parse_string() 3004 3005 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3006 3007 def _parse_load(self) -> exp.LoadData | exp.Command: 3008 if self._match_text_seq("DATA"): 3009 local = self._match_text_seq("LOCAL") 3010 self._match_text_seq("INPATH") 3011 inpath = self._parse_string() 3012 overwrite = self._match(TokenType.OVERWRITE) 3013 self._match_pair(TokenType.INTO, TokenType.TABLE) 3014 3015 return self.expression( 3016 exp.LoadData, 3017 this=self._parse_table(schema=True), 3018 local=local, 3019 overwrite=overwrite, 3020 inpath=inpath, 3021 partition=self._parse_partition(), 3022 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3023 serde=self._match_text_seq("SERDE") and self._parse_string(), 3024 ) 3025 return self._parse_as_command(self._prev) 3026 3027 def _parse_delete(self) -> exp.Delete: 3028 # This handles MySQL's "Multiple-Table Syntax" 3029 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3030 tables = None 3031 if not self._match(TokenType.FROM, advance=False): 3032 tables = self._parse_csv(self._parse_table) or None 3033 3034 returning = self._parse_returning() 3035 3036 return self.expression( 3037 exp.Delete, 3038 tables=tables, 3039 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3040 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3041 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3042 where=self._parse_where(), 3043 returning=returning or self._parse_returning(), 3044 limit=self._parse_limit(), 3045 ) 3046 3047 def _parse_update(self) -> exp.Update: 3048 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3049 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3050 returning = self._parse_returning() 3051 return self.expression( 3052 exp.Update, 3053 **{ # type: ignore 3054 "this": this, 3055 "expressions": expressions, 3056 "from": self._parse_from(joins=True), 3057 "where": self._parse_where(), 3058 "returning": returning or self._parse_returning(), 3059 "order": self._parse_order(), 3060 "limit": self._parse_limit(), 3061 }, 3062 ) 3063 3064 def _parse_use(self) -> exp.Use: 3065 return self.expression( 3066 exp.Use, 3067 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3068 this=self._parse_table(schema=False), 3069 ) 3070 3071 def _parse_uncache(self) -> exp.Uncache: 3072 if not self._match(TokenType.TABLE): 3073 self.raise_error("Expecting TABLE after UNCACHE") 3074 3075 return self.expression( 3076 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3077 ) 3078 3079 def _parse_cache(self) -> exp.Cache: 3080 lazy = self._match_text_seq("LAZY") 3081 self._match(TokenType.TABLE) 3082 table = self._parse_table(schema=True) 3083 3084 options = [] 3085 if self._match_text_seq("OPTIONS"): 3086 self._match_l_paren() 3087 k = self._parse_string() 3088 self._match(TokenType.EQ) 3089 v = self._parse_string() 3090 options = [k, v] 3091 self._match_r_paren() 3092 3093 self._match(TokenType.ALIAS) 3094 return self.expression( 3095 exp.Cache, 3096 this=table, 3097 lazy=lazy, 3098 options=options, 3099 expression=self._parse_select(nested=True), 3100 ) 3101 3102 def _parse_partition(self) -> t.Optional[exp.Partition]: 3103 if not self._match_texts(self.PARTITION_KEYWORDS): 3104 return None 3105 3106 return self.expression( 3107 exp.Partition, 3108 subpartition=self._prev.text.upper() == "SUBPARTITION", 3109 expressions=self._parse_wrapped_csv(self._parse_assignment), 3110 ) 3111 3112 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3113 def _parse_value_expression() -> t.Optional[exp.Expression]: 3114 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3115 return exp.var(self._prev.text.upper()) 3116 return self._parse_expression() 3117 3118 if self._match(TokenType.L_PAREN): 3119 expressions = self._parse_csv(_parse_value_expression) 3120 self._match_r_paren() 3121 return self.expression(exp.Tuple, expressions=expressions) 3122 3123 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3124 expression = self._parse_expression() 3125 if expression: 3126 return self.expression(exp.Tuple, expressions=[expression]) 3127 return None 3128 3129 def _parse_projections(self) -> t.List[exp.Expression]: 3130 return self._parse_expressions() 3131 3132 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3133 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3134 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3135 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3136 ) 3137 elif self._match(TokenType.FROM): 3138 from_ = self._parse_from(skip_from_token=True) 3139 # Support parentheses for duckdb FROM-first syntax 3140 select = self._parse_select() 3141 if select: 3142 select.set("from", from_) 3143 this = select 3144 else: 3145 this = exp.select("*").from_(t.cast(exp.From, from_)) 3146 else: 3147 this = ( 3148 self._parse_table() 3149 if table 3150 else self._parse_select(nested=True, parse_set_operation=False) 3151 ) 3152 3153 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3154 # in case a modifier (e.g. join) is following 3155 if table and isinstance(this, exp.Values) and this.alias: 3156 alias = this.args["alias"].pop() 3157 this = exp.Table(this=this, alias=alias) 3158 3159 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3160 3161 return this 3162 3163 def _parse_select( 3164 self, 3165 nested: bool = False, 3166 table: bool = False, 3167 parse_subquery_alias: bool = True, 3168 parse_set_operation: bool = True, 3169 ) -> t.Optional[exp.Expression]: 3170 cte = self._parse_with() 3171 3172 if cte: 3173 this = self._parse_statement() 3174 3175 if not this: 3176 self.raise_error("Failed to parse any statement following CTE") 3177 return cte 3178 3179 if "with" in this.arg_types: 3180 this.set("with", cte) 3181 else: 3182 self.raise_error(f"{this.key} does not support CTE") 3183 this = cte 3184 3185 return this 3186 3187 # duckdb supports leading with FROM x 3188 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3189 3190 if self._match(TokenType.SELECT): 3191 comments = self._prev_comments 3192 3193 hint = self._parse_hint() 3194 3195 if self._next and not self._next.token_type == TokenType.DOT: 3196 all_ = self._match(TokenType.ALL) 3197 distinct = self._match_set(self.DISTINCT_TOKENS) 3198 else: 3199 all_, distinct = None, None 3200 3201 kind = ( 3202 self._match(TokenType.ALIAS) 3203 and self._match_texts(("STRUCT", "VALUE")) 3204 and self._prev.text.upper() 3205 ) 3206 3207 if distinct: 3208 distinct = self.expression( 3209 exp.Distinct, 3210 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3211 ) 3212 3213 if all_ and distinct: 3214 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3215 3216 operation_modifiers = [] 3217 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3218 operation_modifiers.append(exp.var(self._prev.text.upper())) 3219 3220 limit = self._parse_limit(top=True) 3221 projections = self._parse_projections() 3222 3223 this = self.expression( 3224 exp.Select, 3225 kind=kind, 3226 hint=hint, 3227 distinct=distinct, 3228 expressions=projections, 3229 limit=limit, 3230 operation_modifiers=operation_modifiers or None, 3231 ) 3232 this.comments = comments 3233 3234 into = self._parse_into() 3235 if into: 3236 this.set("into", into) 3237 3238 if not from_: 3239 from_ = self._parse_from() 3240 3241 if from_: 3242 this.set("from", from_) 3243 3244 this = self._parse_query_modifiers(this) 3245 elif (table or nested) and self._match(TokenType.L_PAREN): 3246 this = self._parse_wrapped_select(table=table) 3247 3248 # We return early here so that the UNION isn't attached to the subquery by the 3249 # following call to _parse_set_operations, but instead becomes the parent node 3250 self._match_r_paren() 3251 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3252 elif self._match(TokenType.VALUES, advance=False): 3253 this = self._parse_derived_table_values() 3254 elif from_: 3255 if self._match(TokenType.PIPE_GT, advance=False): 3256 return self._parse_pipe_syntax_query( 3257 exp.Select().from_(from_.this, append=False, copy=False) 3258 ) 3259 this = exp.select("*").from_(from_.this, copy=False) 3260 elif self._match(TokenType.SUMMARIZE): 3261 table = self._match(TokenType.TABLE) 3262 this = self._parse_select() or self._parse_string() or self._parse_table() 3263 return self.expression(exp.Summarize, this=this, table=table) 3264 elif self._match(TokenType.DESCRIBE): 3265 this = self._parse_describe() 3266 elif self._match_text_seq("STREAM"): 3267 this = self._parse_function() 3268 if this: 3269 this = self.expression(exp.Stream, this=this) 3270 else: 3271 self._retreat(self._index - 1) 3272 else: 3273 this = None 3274 3275 return self._parse_set_operations(this) if parse_set_operation else this 3276 3277 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3278 self._match_text_seq("SEARCH") 3279 3280 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3281 3282 if not kind: 3283 return None 3284 3285 self._match_text_seq("FIRST", "BY") 3286 3287 return self.expression( 3288 exp.RecursiveWithSearch, 3289 kind=kind, 3290 this=self._parse_id_var(), 3291 expression=self._match_text_seq("SET") and self._parse_id_var(), 3292 using=self._match_text_seq("USING") and self._parse_id_var(), 3293 ) 3294 3295 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3296 if not skip_with_token and not self._match(TokenType.WITH): 3297 return None 3298 3299 comments = self._prev_comments 3300 recursive = self._match(TokenType.RECURSIVE) 3301 3302 last_comments = None 3303 expressions = [] 3304 while True: 3305 cte = self._parse_cte() 3306 if isinstance(cte, exp.CTE): 3307 expressions.append(cte) 3308 if last_comments: 3309 cte.add_comments(last_comments) 3310 3311 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3312 break 3313 else: 3314 self._match(TokenType.WITH) 3315 3316 last_comments = self._prev_comments 3317 3318 return self.expression( 3319 exp.With, 3320 comments=comments, 3321 expressions=expressions, 3322 recursive=recursive, 3323 search=self._parse_recursive_with_search(), 3324 ) 3325 3326 def _parse_cte(self) -> t.Optional[exp.CTE]: 3327 index = self._index 3328 3329 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3330 if not alias or not alias.this: 3331 self.raise_error("Expected CTE to have alias") 3332 3333 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3334 self._retreat(index) 3335 return None 3336 3337 comments = self._prev_comments 3338 3339 if self._match_text_seq("NOT", "MATERIALIZED"): 3340 materialized = False 3341 elif self._match_text_seq("MATERIALIZED"): 3342 materialized = True 3343 else: 3344 materialized = None 3345 3346 cte = self.expression( 3347 exp.CTE, 3348 this=self._parse_wrapped(self._parse_statement), 3349 alias=alias, 3350 materialized=materialized, 3351 comments=comments, 3352 ) 3353 3354 if isinstance(cte.this, exp.Values): 3355 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3356 3357 return cte 3358 3359 def _parse_table_alias( 3360 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3361 ) -> t.Optional[exp.TableAlias]: 3362 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3363 # so this section tries to parse the clause version and if it fails, it treats the token 3364 # as an identifier (alias) 3365 if self._can_parse_limit_or_offset(): 3366 return None 3367 3368 any_token = self._match(TokenType.ALIAS) 3369 alias = ( 3370 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3371 or self._parse_string_as_identifier() 3372 ) 3373 3374 index = self._index 3375 if self._match(TokenType.L_PAREN): 3376 columns = self._parse_csv(self._parse_function_parameter) 3377 self._match_r_paren() if columns else self._retreat(index) 3378 else: 3379 columns = None 3380 3381 if not alias and not columns: 3382 return None 3383 3384 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3385 3386 # We bubble up comments from the Identifier to the TableAlias 3387 if isinstance(alias, exp.Identifier): 3388 table_alias.add_comments(alias.pop_comments()) 3389 3390 return table_alias 3391 3392 def _parse_subquery( 3393 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3394 ) -> t.Optional[exp.Subquery]: 3395 if not this: 3396 return None 3397 3398 return self.expression( 3399 exp.Subquery, 3400 this=this, 3401 pivots=self._parse_pivots(), 3402 alias=self._parse_table_alias() if parse_alias else None, 3403 sample=self._parse_table_sample(), 3404 ) 3405 3406 def _implicit_unnests_to_explicit(self, this: E) -> E: 3407 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3408 3409 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3410 for i, join in enumerate(this.args.get("joins") or []): 3411 table = join.this 3412 normalized_table = table.copy() 3413 normalized_table.meta["maybe_column"] = True 3414 normalized_table = _norm(normalized_table, dialect=self.dialect) 3415 3416 if isinstance(table, exp.Table) and not join.args.get("on"): 3417 if normalized_table.parts[0].name in refs: 3418 table_as_column = table.to_column() 3419 unnest = exp.Unnest(expressions=[table_as_column]) 3420 3421 # Table.to_column creates a parent Alias node that we want to convert to 3422 # a TableAlias and attach to the Unnest, so it matches the parser's output 3423 if isinstance(table.args.get("alias"), exp.TableAlias): 3424 table_as_column.replace(table_as_column.this) 3425 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3426 3427 table.replace(unnest) 3428 3429 refs.add(normalized_table.alias_or_name) 3430 3431 return this 3432 3433 def _parse_query_modifiers( 3434 self, this: t.Optional[exp.Expression] 3435 ) -> t.Optional[exp.Expression]: 3436 if isinstance(this, self.MODIFIABLES): 3437 for join in self._parse_joins(): 3438 this.append("joins", join) 3439 for lateral in iter(self._parse_lateral, None): 3440 this.append("laterals", lateral) 3441 3442 while True: 3443 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3444 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3445 key, expression = parser(self) 3446 3447 if expression: 3448 this.set(key, expression) 3449 if key == "limit": 3450 offset = expression.args.pop("offset", None) 3451 3452 if offset: 3453 offset = exp.Offset(expression=offset) 3454 this.set("offset", offset) 3455 3456 limit_by_expressions = expression.expressions 3457 expression.set("expressions", None) 3458 offset.set("expressions", limit_by_expressions) 3459 continue 3460 break 3461 3462 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3463 this = self._implicit_unnests_to_explicit(this) 3464 3465 return this 3466 3467 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3468 start = self._curr 3469 while self._curr: 3470 self._advance() 3471 3472 end = self._tokens[self._index - 1] 3473 return exp.Hint(expressions=[self._find_sql(start, end)]) 3474 3475 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3476 return self._parse_function_call() 3477 3478 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3479 start_index = self._index 3480 should_fallback_to_string = False 3481 3482 hints = [] 3483 try: 3484 for hint in iter( 3485 lambda: self._parse_csv( 3486 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3487 ), 3488 [], 3489 ): 3490 hints.extend(hint) 3491 except ParseError: 3492 should_fallback_to_string = True 3493 3494 if should_fallback_to_string or self._curr: 3495 self._retreat(start_index) 3496 return self._parse_hint_fallback_to_string() 3497 3498 return self.expression(exp.Hint, expressions=hints) 3499 3500 def _parse_hint(self) -> t.Optional[exp.Hint]: 3501 if self._match(TokenType.HINT) and self._prev_comments: 3502 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3503 3504 return None 3505 3506 def _parse_into(self) -> t.Optional[exp.Into]: 3507 if not self._match(TokenType.INTO): 3508 return None 3509 3510 temp = self._match(TokenType.TEMPORARY) 3511 unlogged = self._match_text_seq("UNLOGGED") 3512 self._match(TokenType.TABLE) 3513 3514 return self.expression( 3515 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3516 ) 3517 3518 def _parse_from( 3519 self, joins: bool = False, skip_from_token: bool = False 3520 ) -> t.Optional[exp.From]: 3521 if not skip_from_token and not self._match(TokenType.FROM): 3522 return None 3523 3524 return self.expression( 3525 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3526 ) 3527 3528 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3529 return self.expression( 3530 exp.MatchRecognizeMeasure, 3531 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3532 this=self._parse_expression(), 3533 ) 3534 3535 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3536 if not self._match(TokenType.MATCH_RECOGNIZE): 3537 return None 3538 3539 self._match_l_paren() 3540 3541 partition = self._parse_partition_by() 3542 order = self._parse_order() 3543 3544 measures = ( 3545 self._parse_csv(self._parse_match_recognize_measure) 3546 if self._match_text_seq("MEASURES") 3547 else None 3548 ) 3549 3550 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3551 rows = exp.var("ONE ROW PER MATCH") 3552 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3553 text = "ALL ROWS PER MATCH" 3554 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3555 text += " SHOW EMPTY MATCHES" 3556 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3557 text += " OMIT EMPTY MATCHES" 3558 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3559 text += " WITH UNMATCHED ROWS" 3560 rows = exp.var(text) 3561 else: 3562 rows = None 3563 3564 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3565 text = "AFTER MATCH SKIP" 3566 if self._match_text_seq("PAST", "LAST", "ROW"): 3567 text += " PAST LAST ROW" 3568 elif self._match_text_seq("TO", "NEXT", "ROW"): 3569 text += " TO NEXT ROW" 3570 elif self._match_text_seq("TO", "FIRST"): 3571 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3572 elif self._match_text_seq("TO", "LAST"): 3573 text += f" TO LAST {self._advance_any().text}" # type: ignore 3574 after = exp.var(text) 3575 else: 3576 after = None 3577 3578 if self._match_text_seq("PATTERN"): 3579 self._match_l_paren() 3580 3581 if not self._curr: 3582 self.raise_error("Expecting )", self._curr) 3583 3584 paren = 1 3585 start = self._curr 3586 3587 while self._curr and paren > 0: 3588 if self._curr.token_type == TokenType.L_PAREN: 3589 paren += 1 3590 if self._curr.token_type == TokenType.R_PAREN: 3591 paren -= 1 3592 3593 end = self._prev 3594 self._advance() 3595 3596 if paren > 0: 3597 self.raise_error("Expecting )", self._curr) 3598 3599 pattern = exp.var(self._find_sql(start, end)) 3600 else: 3601 pattern = None 3602 3603 define = ( 3604 self._parse_csv(self._parse_name_as_expression) 3605 if self._match_text_seq("DEFINE") 3606 else None 3607 ) 3608 3609 self._match_r_paren() 3610 3611 return self.expression( 3612 exp.MatchRecognize, 3613 partition_by=partition, 3614 order=order, 3615 measures=measures, 3616 rows=rows, 3617 after=after, 3618 pattern=pattern, 3619 define=define, 3620 alias=self._parse_table_alias(), 3621 ) 3622 3623 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3624 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3625 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3626 cross_apply = False 3627 3628 if cross_apply is not None: 3629 this = self._parse_select(table=True) 3630 view = None 3631 outer = None 3632 elif self._match(TokenType.LATERAL): 3633 this = self._parse_select(table=True) 3634 view = self._match(TokenType.VIEW) 3635 outer = self._match(TokenType.OUTER) 3636 else: 3637 return None 3638 3639 if not this: 3640 this = ( 3641 self._parse_unnest() 3642 or self._parse_function() 3643 or self._parse_id_var(any_token=False) 3644 ) 3645 3646 while self._match(TokenType.DOT): 3647 this = exp.Dot( 3648 this=this, 3649 expression=self._parse_function() or self._parse_id_var(any_token=False), 3650 ) 3651 3652 ordinality: t.Optional[bool] = None 3653 3654 if view: 3655 table = self._parse_id_var(any_token=False) 3656 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3657 table_alias: t.Optional[exp.TableAlias] = self.expression( 3658 exp.TableAlias, this=table, columns=columns 3659 ) 3660 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3661 # We move the alias from the lateral's child node to the lateral itself 3662 table_alias = this.args["alias"].pop() 3663 else: 3664 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3665 table_alias = self._parse_table_alias() 3666 3667 return self.expression( 3668 exp.Lateral, 3669 this=this, 3670 view=view, 3671 outer=outer, 3672 alias=table_alias, 3673 cross_apply=cross_apply, 3674 ordinality=ordinality, 3675 ) 3676 3677 def _parse_join_parts( 3678 self, 3679 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3680 return ( 3681 self._match_set(self.JOIN_METHODS) and self._prev, 3682 self._match_set(self.JOIN_SIDES) and self._prev, 3683 self._match_set(self.JOIN_KINDS) and self._prev, 3684 ) 3685 3686 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3687 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3688 this = self._parse_column() 3689 if isinstance(this, exp.Column): 3690 return this.this 3691 return this 3692 3693 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3694 3695 def _parse_join( 3696 self, skip_join_token: bool = False, parse_bracket: bool = False 3697 ) -> t.Optional[exp.Join]: 3698 if self._match(TokenType.COMMA): 3699 table = self._try_parse(self._parse_table) 3700 if table: 3701 return self.expression(exp.Join, this=table) 3702 return None 3703 3704 index = self._index 3705 method, side, kind = self._parse_join_parts() 3706 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3707 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3708 3709 if not skip_join_token and not join: 3710 self._retreat(index) 3711 kind = None 3712 method = None 3713 side = None 3714 3715 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3716 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3717 3718 if not skip_join_token and not join and not outer_apply and not cross_apply: 3719 return None 3720 3721 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3722 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3723 kwargs["expressions"] = self._parse_csv( 3724 lambda: self._parse_table(parse_bracket=parse_bracket) 3725 ) 3726 3727 if method: 3728 kwargs["method"] = method.text 3729 if side: 3730 kwargs["side"] = side.text 3731 if kind: 3732 kwargs["kind"] = kind.text 3733 if hint: 3734 kwargs["hint"] = hint 3735 3736 if self._match(TokenType.MATCH_CONDITION): 3737 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3738 3739 if self._match(TokenType.ON): 3740 kwargs["on"] = self._parse_assignment() 3741 elif self._match(TokenType.USING): 3742 kwargs["using"] = self._parse_using_identifiers() 3743 elif ( 3744 not (outer_apply or cross_apply) 3745 and not isinstance(kwargs["this"], exp.Unnest) 3746 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3747 ): 3748 index = self._index 3749 joins: t.Optional[list] = list(self._parse_joins()) 3750 3751 if joins and self._match(TokenType.ON): 3752 kwargs["on"] = self._parse_assignment() 3753 elif joins and self._match(TokenType.USING): 3754 kwargs["using"] = self._parse_using_identifiers() 3755 else: 3756 joins = None 3757 self._retreat(index) 3758 3759 kwargs["this"].set("joins", joins if joins else None) 3760 3761 kwargs["pivots"] = self._parse_pivots() 3762 3763 comments = [c for token in (method, side, kind) if token for c in token.comments] 3764 return self.expression(exp.Join, comments=comments, **kwargs) 3765 3766 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3767 this = self._parse_assignment() 3768 3769 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3770 return this 3771 3772 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3773 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3774 3775 return this 3776 3777 def _parse_index_params(self) -> exp.IndexParameters: 3778 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3779 3780 if self._match(TokenType.L_PAREN, advance=False): 3781 columns = self._parse_wrapped_csv(self._parse_with_operator) 3782 else: 3783 columns = None 3784 3785 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3786 partition_by = self._parse_partition_by() 3787 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3788 tablespace = ( 3789 self._parse_var(any_token=True) 3790 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3791 else None 3792 ) 3793 where = self._parse_where() 3794 3795 on = self._parse_field() if self._match(TokenType.ON) else None 3796 3797 return self.expression( 3798 exp.IndexParameters, 3799 using=using, 3800 columns=columns, 3801 include=include, 3802 partition_by=partition_by, 3803 where=where, 3804 with_storage=with_storage, 3805 tablespace=tablespace, 3806 on=on, 3807 ) 3808 3809 def _parse_index( 3810 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3811 ) -> t.Optional[exp.Index]: 3812 if index or anonymous: 3813 unique = None 3814 primary = None 3815 amp = None 3816 3817 self._match(TokenType.ON) 3818 self._match(TokenType.TABLE) # hive 3819 table = self._parse_table_parts(schema=True) 3820 else: 3821 unique = self._match(TokenType.UNIQUE) 3822 primary = self._match_text_seq("PRIMARY") 3823 amp = self._match_text_seq("AMP") 3824 3825 if not self._match(TokenType.INDEX): 3826 return None 3827 3828 index = self._parse_id_var() 3829 table = None 3830 3831 params = self._parse_index_params() 3832 3833 return self.expression( 3834 exp.Index, 3835 this=index, 3836 table=table, 3837 unique=unique, 3838 primary=primary, 3839 amp=amp, 3840 params=params, 3841 ) 3842 3843 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3844 hints: t.List[exp.Expression] = [] 3845 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3846 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3847 hints.append( 3848 self.expression( 3849 exp.WithTableHint, 3850 expressions=self._parse_csv( 3851 lambda: self._parse_function() or self._parse_var(any_token=True) 3852 ), 3853 ) 3854 ) 3855 self._match_r_paren() 3856 else: 3857 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3858 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3859 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3860 3861 self._match_set((TokenType.INDEX, TokenType.KEY)) 3862 if self._match(TokenType.FOR): 3863 hint.set("target", self._advance_any() and self._prev.text.upper()) 3864 3865 hint.set("expressions", self._parse_wrapped_id_vars()) 3866 hints.append(hint) 3867 3868 return hints or None 3869 3870 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3871 return ( 3872 (not schema and self._parse_function(optional_parens=False)) 3873 or self._parse_id_var(any_token=False) 3874 or self._parse_string_as_identifier() 3875 or self._parse_placeholder() 3876 ) 3877 3878 def _parse_table_parts( 3879 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3880 ) -> exp.Table: 3881 catalog = None 3882 db = None 3883 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3884 3885 while self._match(TokenType.DOT): 3886 if catalog: 3887 # This allows nesting the table in arbitrarily many dot expressions if needed 3888 table = self.expression( 3889 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3890 ) 3891 else: 3892 catalog = db 3893 db = table 3894 # "" used for tsql FROM a..b case 3895 table = self._parse_table_part(schema=schema) or "" 3896 3897 if ( 3898 wildcard 3899 and self._is_connected() 3900 and (isinstance(table, exp.Identifier) or not table) 3901 and self._match(TokenType.STAR) 3902 ): 3903 if isinstance(table, exp.Identifier): 3904 table.args["this"] += "*" 3905 else: 3906 table = exp.Identifier(this="*") 3907 3908 # We bubble up comments from the Identifier to the Table 3909 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3910 3911 if is_db_reference: 3912 catalog = db 3913 db = table 3914 table = None 3915 3916 if not table and not is_db_reference: 3917 self.raise_error(f"Expected table name but got {self._curr}") 3918 if not db and is_db_reference: 3919 self.raise_error(f"Expected database name but got {self._curr}") 3920 3921 table = self.expression( 3922 exp.Table, 3923 comments=comments, 3924 this=table, 3925 db=db, 3926 catalog=catalog, 3927 ) 3928 3929 changes = self._parse_changes() 3930 if changes: 3931 table.set("changes", changes) 3932 3933 at_before = self._parse_historical_data() 3934 if at_before: 3935 table.set("when", at_before) 3936 3937 pivots = self._parse_pivots() 3938 if pivots: 3939 table.set("pivots", pivots) 3940 3941 return table 3942 3943 def _parse_table( 3944 self, 3945 schema: bool = False, 3946 joins: bool = False, 3947 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3948 parse_bracket: bool = False, 3949 is_db_reference: bool = False, 3950 parse_partition: bool = False, 3951 ) -> t.Optional[exp.Expression]: 3952 lateral = self._parse_lateral() 3953 if lateral: 3954 return lateral 3955 3956 unnest = self._parse_unnest() 3957 if unnest: 3958 return unnest 3959 3960 values = self._parse_derived_table_values() 3961 if values: 3962 return values 3963 3964 subquery = self._parse_select(table=True) 3965 if subquery: 3966 if not subquery.args.get("pivots"): 3967 subquery.set("pivots", self._parse_pivots()) 3968 return subquery 3969 3970 bracket = parse_bracket and self._parse_bracket(None) 3971 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3972 3973 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3974 self._parse_table 3975 ) 3976 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3977 3978 only = self._match(TokenType.ONLY) 3979 3980 this = t.cast( 3981 exp.Expression, 3982 bracket 3983 or rows_from 3984 or self._parse_bracket( 3985 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3986 ), 3987 ) 3988 3989 if only: 3990 this.set("only", only) 3991 3992 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3993 self._match_text_seq("*") 3994 3995 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3996 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3997 this.set("partition", self._parse_partition()) 3998 3999 if schema: 4000 return self._parse_schema(this=this) 4001 4002 version = self._parse_version() 4003 4004 if version: 4005 this.set("version", version) 4006 4007 if self.dialect.ALIAS_POST_TABLESAMPLE: 4008 this.set("sample", self._parse_table_sample()) 4009 4010 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4011 if alias: 4012 this.set("alias", alias) 4013 4014 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4015 return self.expression( 4016 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4017 ) 4018 4019 this.set("hints", self._parse_table_hints()) 4020 4021 if not this.args.get("pivots"): 4022 this.set("pivots", self._parse_pivots()) 4023 4024 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4025 this.set("sample", self._parse_table_sample()) 4026 4027 if joins: 4028 for join in self._parse_joins(): 4029 this.append("joins", join) 4030 4031 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4032 this.set("ordinality", True) 4033 this.set("alias", self._parse_table_alias()) 4034 4035 return this 4036 4037 def _parse_version(self) -> t.Optional[exp.Version]: 4038 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4039 this = "TIMESTAMP" 4040 elif self._match(TokenType.VERSION_SNAPSHOT): 4041 this = "VERSION" 4042 else: 4043 return None 4044 4045 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4046 kind = self._prev.text.upper() 4047 start = self._parse_bitwise() 4048 self._match_texts(("TO", "AND")) 4049 end = self._parse_bitwise() 4050 expression: t.Optional[exp.Expression] = self.expression( 4051 exp.Tuple, expressions=[start, end] 4052 ) 4053 elif self._match_text_seq("CONTAINED", "IN"): 4054 kind = "CONTAINED IN" 4055 expression = self.expression( 4056 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4057 ) 4058 elif self._match(TokenType.ALL): 4059 kind = "ALL" 4060 expression = None 4061 else: 4062 self._match_text_seq("AS", "OF") 4063 kind = "AS OF" 4064 expression = self._parse_type() 4065 4066 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4067 4068 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4069 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4070 index = self._index 4071 historical_data = None 4072 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4073 this = self._prev.text.upper() 4074 kind = ( 4075 self._match(TokenType.L_PAREN) 4076 and self._match_texts(self.HISTORICAL_DATA_KIND) 4077 and self._prev.text.upper() 4078 ) 4079 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4080 4081 if expression: 4082 self._match_r_paren() 4083 historical_data = self.expression( 4084 exp.HistoricalData, this=this, kind=kind, expression=expression 4085 ) 4086 else: 4087 self._retreat(index) 4088 4089 return historical_data 4090 4091 def _parse_changes(self) -> t.Optional[exp.Changes]: 4092 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4093 return None 4094 4095 information = self._parse_var(any_token=True) 4096 self._match_r_paren() 4097 4098 return self.expression( 4099 exp.Changes, 4100 information=information, 4101 at_before=self._parse_historical_data(), 4102 end=self._parse_historical_data(), 4103 ) 4104 4105 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4106 if not self._match(TokenType.UNNEST): 4107 return None 4108 4109 expressions = self._parse_wrapped_csv(self._parse_equality) 4110 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4111 4112 alias = self._parse_table_alias() if with_alias else None 4113 4114 if alias: 4115 if self.dialect.UNNEST_COLUMN_ONLY: 4116 if alias.args.get("columns"): 4117 self.raise_error("Unexpected extra column alias in unnest.") 4118 4119 alias.set("columns", [alias.this]) 4120 alias.set("this", None) 4121 4122 columns = alias.args.get("columns") or [] 4123 if offset and len(expressions) < len(columns): 4124 offset = columns.pop() 4125 4126 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4127 self._match(TokenType.ALIAS) 4128 offset = self._parse_id_var( 4129 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4130 ) or exp.to_identifier("offset") 4131 4132 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4133 4134 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4135 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4136 if not is_derived and not ( 4137 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4138 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4139 ): 4140 return None 4141 4142 expressions = self._parse_csv(self._parse_value) 4143 alias = self._parse_table_alias() 4144 4145 if is_derived: 4146 self._match_r_paren() 4147 4148 return self.expression( 4149 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4150 ) 4151 4152 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4153 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4154 as_modifier and self._match_text_seq("USING", "SAMPLE") 4155 ): 4156 return None 4157 4158 bucket_numerator = None 4159 bucket_denominator = None 4160 bucket_field = None 4161 percent = None 4162 size = None 4163 seed = None 4164 4165 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4166 matched_l_paren = self._match(TokenType.L_PAREN) 4167 4168 if self.TABLESAMPLE_CSV: 4169 num = None 4170 expressions = self._parse_csv(self._parse_primary) 4171 else: 4172 expressions = None 4173 num = ( 4174 self._parse_factor() 4175 if self._match(TokenType.NUMBER, advance=False) 4176 else self._parse_primary() or self._parse_placeholder() 4177 ) 4178 4179 if self._match_text_seq("BUCKET"): 4180 bucket_numerator = self._parse_number() 4181 self._match_text_seq("OUT", "OF") 4182 bucket_denominator = bucket_denominator = self._parse_number() 4183 self._match(TokenType.ON) 4184 bucket_field = self._parse_field() 4185 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4186 percent = num 4187 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4188 size = num 4189 else: 4190 percent = num 4191 4192 if matched_l_paren: 4193 self._match_r_paren() 4194 4195 if self._match(TokenType.L_PAREN): 4196 method = self._parse_var(upper=True) 4197 seed = self._match(TokenType.COMMA) and self._parse_number() 4198 self._match_r_paren() 4199 elif self._match_texts(("SEED", "REPEATABLE")): 4200 seed = self._parse_wrapped(self._parse_number) 4201 4202 if not method and self.DEFAULT_SAMPLING_METHOD: 4203 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4204 4205 return self.expression( 4206 exp.TableSample, 4207 expressions=expressions, 4208 method=method, 4209 bucket_numerator=bucket_numerator, 4210 bucket_denominator=bucket_denominator, 4211 bucket_field=bucket_field, 4212 percent=percent, 4213 size=size, 4214 seed=seed, 4215 ) 4216 4217 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4218 return list(iter(self._parse_pivot, None)) or None 4219 4220 def _parse_joins(self) -> t.Iterator[exp.Join]: 4221 return iter(self._parse_join, None) 4222 4223 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4224 if not self._match(TokenType.INTO): 4225 return None 4226 4227 return self.expression( 4228 exp.UnpivotColumns, 4229 this=self._match_text_seq("NAME") and self._parse_column(), 4230 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4231 ) 4232 4233 # https://duckdb.org/docs/sql/statements/pivot 4234 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4235 def _parse_on() -> t.Optional[exp.Expression]: 4236 this = self._parse_bitwise() 4237 4238 if self._match(TokenType.IN): 4239 # PIVOT ... ON col IN (row_val1, row_val2) 4240 return self._parse_in(this) 4241 if self._match(TokenType.ALIAS, advance=False): 4242 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4243 return self._parse_alias(this) 4244 4245 return this 4246 4247 this = self._parse_table() 4248 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4249 into = self._parse_unpivot_columns() 4250 using = self._match(TokenType.USING) and self._parse_csv( 4251 lambda: self._parse_alias(self._parse_function()) 4252 ) 4253 group = self._parse_group() 4254 4255 return self.expression( 4256 exp.Pivot, 4257 this=this, 4258 expressions=expressions, 4259 using=using, 4260 group=group, 4261 unpivot=is_unpivot, 4262 into=into, 4263 ) 4264 4265 def _parse_pivot_in(self) -> exp.In: 4266 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4267 this = self._parse_select_or_expression() 4268 4269 self._match(TokenType.ALIAS) 4270 alias = self._parse_bitwise() 4271 if alias: 4272 if isinstance(alias, exp.Column) and not alias.db: 4273 alias = alias.this 4274 return self.expression(exp.PivotAlias, this=this, alias=alias) 4275 4276 return this 4277 4278 value = self._parse_column() 4279 4280 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4281 self.raise_error("Expecting IN (") 4282 4283 if self._match(TokenType.ANY): 4284 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4285 else: 4286 exprs = self._parse_csv(_parse_aliased_expression) 4287 4288 self._match_r_paren() 4289 return self.expression(exp.In, this=value, expressions=exprs) 4290 4291 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4292 index = self._index 4293 include_nulls = None 4294 4295 if self._match(TokenType.PIVOT): 4296 unpivot = False 4297 elif self._match(TokenType.UNPIVOT): 4298 unpivot = True 4299 4300 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4301 if self._match_text_seq("INCLUDE", "NULLS"): 4302 include_nulls = True 4303 elif self._match_text_seq("EXCLUDE", "NULLS"): 4304 include_nulls = False 4305 else: 4306 return None 4307 4308 expressions = [] 4309 4310 if not self._match(TokenType.L_PAREN): 4311 self._retreat(index) 4312 return None 4313 4314 if unpivot: 4315 expressions = self._parse_csv(self._parse_column) 4316 else: 4317 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4318 4319 if not expressions: 4320 self.raise_error("Failed to parse PIVOT's aggregation list") 4321 4322 if not self._match(TokenType.FOR): 4323 self.raise_error("Expecting FOR") 4324 4325 fields = [] 4326 while True: 4327 field = self._try_parse(self._parse_pivot_in) 4328 if not field: 4329 break 4330 fields.append(field) 4331 4332 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4333 self._parse_bitwise 4334 ) 4335 4336 group = self._parse_group() 4337 4338 self._match_r_paren() 4339 4340 pivot = self.expression( 4341 exp.Pivot, 4342 expressions=expressions, 4343 fields=fields, 4344 unpivot=unpivot, 4345 include_nulls=include_nulls, 4346 default_on_null=default_on_null, 4347 group=group, 4348 ) 4349 4350 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4351 pivot.set("alias", self._parse_table_alias()) 4352 4353 if not unpivot: 4354 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4355 4356 columns: t.List[exp.Expression] = [] 4357 all_fields = [] 4358 for pivot_field in pivot.fields: 4359 pivot_field_expressions = pivot_field.expressions 4360 4361 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4362 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4363 continue 4364 4365 all_fields.append( 4366 [ 4367 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4368 for fld in pivot_field_expressions 4369 ] 4370 ) 4371 4372 if all_fields: 4373 if names: 4374 all_fields.append(names) 4375 4376 # Generate all possible combinations of the pivot columns 4377 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4378 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4379 for fld_parts_tuple in itertools.product(*all_fields): 4380 fld_parts = list(fld_parts_tuple) 4381 4382 if names and self.PREFIXED_PIVOT_COLUMNS: 4383 # Move the "name" to the front of the list 4384 fld_parts.insert(0, fld_parts.pop(-1)) 4385 4386 columns.append(exp.to_identifier("_".join(fld_parts))) 4387 4388 pivot.set("columns", columns) 4389 4390 return pivot 4391 4392 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4393 return [agg.alias for agg in aggregations if agg.alias] 4394 4395 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4396 if not skip_where_token and not self._match(TokenType.PREWHERE): 4397 return None 4398 4399 return self.expression( 4400 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4401 ) 4402 4403 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4404 if not skip_where_token and not self._match(TokenType.WHERE): 4405 return None 4406 4407 return self.expression( 4408 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4409 ) 4410 4411 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4412 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4413 return None 4414 4415 elements: t.Dict[str, t.Any] = defaultdict(list) 4416 4417 if self._match(TokenType.ALL): 4418 elements["all"] = True 4419 elif self._match(TokenType.DISTINCT): 4420 elements["all"] = False 4421 4422 while True: 4423 index = self._index 4424 4425 elements["expressions"].extend( 4426 self._parse_csv( 4427 lambda: None 4428 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4429 else self._parse_assignment() 4430 ) 4431 ) 4432 4433 before_with_index = self._index 4434 with_prefix = self._match(TokenType.WITH) 4435 4436 if self._match(TokenType.ROLLUP): 4437 elements["rollup"].append( 4438 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4439 ) 4440 elif self._match(TokenType.CUBE): 4441 elements["cube"].append( 4442 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4443 ) 4444 elif self._match(TokenType.GROUPING_SETS): 4445 elements["grouping_sets"].append( 4446 self.expression( 4447 exp.GroupingSets, 4448 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4449 ) 4450 ) 4451 elif self._match_text_seq("TOTALS"): 4452 elements["totals"] = True # type: ignore 4453 4454 if before_with_index <= self._index <= before_with_index + 1: 4455 self._retreat(before_with_index) 4456 break 4457 4458 if index == self._index: 4459 break 4460 4461 return self.expression(exp.Group, **elements) # type: ignore 4462 4463 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4464 return self.expression( 4465 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4466 ) 4467 4468 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4469 if self._match(TokenType.L_PAREN): 4470 grouping_set = self._parse_csv(self._parse_column) 4471 self._match_r_paren() 4472 return self.expression(exp.Tuple, expressions=grouping_set) 4473 4474 return self._parse_column() 4475 4476 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4477 if not skip_having_token and not self._match(TokenType.HAVING): 4478 return None 4479 return self.expression(exp.Having, this=self._parse_assignment()) 4480 4481 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4482 if not self._match(TokenType.QUALIFY): 4483 return None 4484 return self.expression(exp.Qualify, this=self._parse_assignment()) 4485 4486 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4487 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4488 exp.Prior, this=self._parse_bitwise() 4489 ) 4490 connect = self._parse_assignment() 4491 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4492 return connect 4493 4494 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4495 if skip_start_token: 4496 start = None 4497 elif self._match(TokenType.START_WITH): 4498 start = self._parse_assignment() 4499 else: 4500 return None 4501 4502 self._match(TokenType.CONNECT_BY) 4503 nocycle = self._match_text_seq("NOCYCLE") 4504 connect = self._parse_connect_with_prior() 4505 4506 if not start and self._match(TokenType.START_WITH): 4507 start = self._parse_assignment() 4508 4509 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4510 4511 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4512 this = self._parse_id_var(any_token=True) 4513 if self._match(TokenType.ALIAS): 4514 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4515 return this 4516 4517 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4518 if self._match_text_seq("INTERPOLATE"): 4519 return self._parse_wrapped_csv(self._parse_name_as_expression) 4520 return None 4521 4522 def _parse_order( 4523 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4524 ) -> t.Optional[exp.Expression]: 4525 siblings = None 4526 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4527 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4528 return this 4529 4530 siblings = True 4531 4532 return self.expression( 4533 exp.Order, 4534 this=this, 4535 expressions=self._parse_csv(self._parse_ordered), 4536 siblings=siblings, 4537 ) 4538 4539 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4540 if not self._match(token): 4541 return None 4542 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4543 4544 def _parse_ordered( 4545 self, parse_method: t.Optional[t.Callable] = None 4546 ) -> t.Optional[exp.Ordered]: 4547 this = parse_method() if parse_method else self._parse_assignment() 4548 if not this: 4549 return None 4550 4551 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4552 this = exp.var("ALL") 4553 4554 asc = self._match(TokenType.ASC) 4555 desc = self._match(TokenType.DESC) or (asc and False) 4556 4557 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4558 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4559 4560 nulls_first = is_nulls_first or False 4561 explicitly_null_ordered = is_nulls_first or is_nulls_last 4562 4563 if ( 4564 not explicitly_null_ordered 4565 and ( 4566 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4567 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4568 ) 4569 and self.dialect.NULL_ORDERING != "nulls_are_last" 4570 ): 4571 nulls_first = True 4572 4573 if self._match_text_seq("WITH", "FILL"): 4574 with_fill = self.expression( 4575 exp.WithFill, 4576 **{ # type: ignore 4577 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4578 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4579 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4580 "interpolate": self._parse_interpolate(), 4581 }, 4582 ) 4583 else: 4584 with_fill = None 4585 4586 return self.expression( 4587 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4588 ) 4589 4590 def _parse_limit_options(self) -> exp.LimitOptions: 4591 percent = self._match(TokenType.PERCENT) 4592 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4593 self._match_text_seq("ONLY") 4594 with_ties = self._match_text_seq("WITH", "TIES") 4595 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4596 4597 def _parse_limit( 4598 self, 4599 this: t.Optional[exp.Expression] = None, 4600 top: bool = False, 4601 skip_limit_token: bool = False, 4602 ) -> t.Optional[exp.Expression]: 4603 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4604 comments = self._prev_comments 4605 if top: 4606 limit_paren = self._match(TokenType.L_PAREN) 4607 expression = self._parse_term() if limit_paren else self._parse_number() 4608 4609 if limit_paren: 4610 self._match_r_paren() 4611 4612 limit_options = self._parse_limit_options() 4613 else: 4614 limit_options = None 4615 expression = self._parse_term() 4616 4617 if self._match(TokenType.COMMA): 4618 offset = expression 4619 expression = self._parse_term() 4620 else: 4621 offset = None 4622 4623 limit_exp = self.expression( 4624 exp.Limit, 4625 this=this, 4626 expression=expression, 4627 offset=offset, 4628 comments=comments, 4629 limit_options=limit_options, 4630 expressions=self._parse_limit_by(), 4631 ) 4632 4633 return limit_exp 4634 4635 if self._match(TokenType.FETCH): 4636 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4637 direction = self._prev.text.upper() if direction else "FIRST" 4638 4639 count = self._parse_field(tokens=self.FETCH_TOKENS) 4640 4641 return self.expression( 4642 exp.Fetch, 4643 direction=direction, 4644 count=count, 4645 limit_options=self._parse_limit_options(), 4646 ) 4647 4648 return this 4649 4650 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4651 if not self._match(TokenType.OFFSET): 4652 return this 4653 4654 count = self._parse_term() 4655 self._match_set((TokenType.ROW, TokenType.ROWS)) 4656 4657 return self.expression( 4658 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4659 ) 4660 4661 def _can_parse_limit_or_offset(self) -> bool: 4662 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4663 return False 4664 4665 index = self._index 4666 result = bool( 4667 self._try_parse(self._parse_limit, retreat=True) 4668 or self._try_parse(self._parse_offset, retreat=True) 4669 ) 4670 self._retreat(index) 4671 return result 4672 4673 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4674 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4675 4676 def _parse_locks(self) -> t.List[exp.Lock]: 4677 locks = [] 4678 while True: 4679 if self._match_text_seq("FOR", "UPDATE"): 4680 update = True 4681 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4682 "LOCK", "IN", "SHARE", "MODE" 4683 ): 4684 update = False 4685 else: 4686 break 4687 4688 expressions = None 4689 if self._match_text_seq("OF"): 4690 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4691 4692 wait: t.Optional[bool | exp.Expression] = None 4693 if self._match_text_seq("NOWAIT"): 4694 wait = True 4695 elif self._match_text_seq("WAIT"): 4696 wait = self._parse_primary() 4697 elif self._match_text_seq("SKIP", "LOCKED"): 4698 wait = False 4699 4700 locks.append( 4701 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4702 ) 4703 4704 return locks 4705 4706 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4707 start = self._index 4708 _, side_token, kind_token = self._parse_join_parts() 4709 4710 side = side_token.text if side_token else None 4711 kind = kind_token.text if kind_token else None 4712 4713 if not self._match_set(self.SET_OPERATIONS): 4714 self._retreat(start) 4715 return None 4716 4717 token_type = self._prev.token_type 4718 4719 if token_type == TokenType.UNION: 4720 operation: t.Type[exp.SetOperation] = exp.Union 4721 elif token_type == TokenType.EXCEPT: 4722 operation = exp.Except 4723 else: 4724 operation = exp.Intersect 4725 4726 comments = self._prev.comments 4727 4728 if self._match(TokenType.DISTINCT): 4729 distinct: t.Optional[bool] = True 4730 elif self._match(TokenType.ALL): 4731 distinct = False 4732 else: 4733 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4734 if distinct is None: 4735 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4736 4737 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4738 "STRICT", "CORRESPONDING" 4739 ) 4740 if self._match_text_seq("CORRESPONDING"): 4741 by_name = True 4742 if not side and not kind: 4743 kind = "INNER" 4744 4745 on_column_list = None 4746 if by_name and self._match_texts(("ON", "BY")): 4747 on_column_list = self._parse_wrapped_csv(self._parse_column) 4748 4749 expression = self._parse_select(nested=True, parse_set_operation=False) 4750 4751 return self.expression( 4752 operation, 4753 comments=comments, 4754 this=this, 4755 distinct=distinct, 4756 by_name=by_name, 4757 expression=expression, 4758 side=side, 4759 kind=kind, 4760 on=on_column_list, 4761 ) 4762 4763 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4764 while this: 4765 setop = self.parse_set_operation(this) 4766 if not setop: 4767 break 4768 this = setop 4769 4770 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4771 expression = this.expression 4772 4773 if expression: 4774 for arg in self.SET_OP_MODIFIERS: 4775 expr = expression.args.get(arg) 4776 if expr: 4777 this.set(arg, expr.pop()) 4778 4779 return this 4780 4781 def _parse_expression(self) -> t.Optional[exp.Expression]: 4782 return self._parse_alias(self._parse_assignment()) 4783 4784 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4785 this = self._parse_disjunction() 4786 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4787 # This allows us to parse <non-identifier token> := <expr> 4788 this = exp.column( 4789 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4790 ) 4791 4792 while self._match_set(self.ASSIGNMENT): 4793 if isinstance(this, exp.Column) and len(this.parts) == 1: 4794 this = this.this 4795 4796 this = self.expression( 4797 self.ASSIGNMENT[self._prev.token_type], 4798 this=this, 4799 comments=self._prev_comments, 4800 expression=self._parse_assignment(), 4801 ) 4802 4803 return this 4804 4805 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4806 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4807 4808 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4809 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4810 4811 def _parse_equality(self) -> t.Optional[exp.Expression]: 4812 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4813 4814 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4815 return self._parse_tokens(self._parse_range, self.COMPARISON) 4816 4817 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4818 this = this or self._parse_bitwise() 4819 negate = self._match(TokenType.NOT) 4820 4821 if self._match_set(self.RANGE_PARSERS): 4822 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4823 if not expression: 4824 return this 4825 4826 this = expression 4827 elif self._match(TokenType.ISNULL): 4828 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4829 4830 # Postgres supports ISNULL and NOTNULL for conditions. 4831 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4832 if self._match(TokenType.NOTNULL): 4833 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4834 this = self.expression(exp.Not, this=this) 4835 4836 if negate: 4837 this = self._negate_range(this) 4838 4839 if self._match(TokenType.IS): 4840 this = self._parse_is(this) 4841 4842 return this 4843 4844 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4845 if not this: 4846 return this 4847 4848 return self.expression(exp.Not, this=this) 4849 4850 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4851 index = self._index - 1 4852 negate = self._match(TokenType.NOT) 4853 4854 if self._match_text_seq("DISTINCT", "FROM"): 4855 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4856 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4857 4858 if self._match(TokenType.JSON): 4859 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4860 4861 if self._match_text_seq("WITH"): 4862 _with = True 4863 elif self._match_text_seq("WITHOUT"): 4864 _with = False 4865 else: 4866 _with = None 4867 4868 unique = self._match(TokenType.UNIQUE) 4869 self._match_text_seq("KEYS") 4870 expression: t.Optional[exp.Expression] = self.expression( 4871 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4872 ) 4873 else: 4874 expression = self._parse_primary() or self._parse_null() 4875 if not expression: 4876 self._retreat(index) 4877 return None 4878 4879 this = self.expression(exp.Is, this=this, expression=expression) 4880 return self.expression(exp.Not, this=this) if negate else this 4881 4882 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4883 unnest = self._parse_unnest(with_alias=False) 4884 if unnest: 4885 this = self.expression(exp.In, this=this, unnest=unnest) 4886 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4887 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4888 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4889 4890 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4891 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4892 else: 4893 this = self.expression(exp.In, this=this, expressions=expressions) 4894 4895 if matched_l_paren: 4896 self._match_r_paren(this) 4897 elif not self._match(TokenType.R_BRACKET, expression=this): 4898 self.raise_error("Expecting ]") 4899 else: 4900 this = self.expression(exp.In, this=this, field=self._parse_column()) 4901 4902 return this 4903 4904 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4905 low = self._parse_bitwise() 4906 self._match(TokenType.AND) 4907 high = self._parse_bitwise() 4908 return self.expression(exp.Between, this=this, low=low, high=high) 4909 4910 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4911 if not self._match(TokenType.ESCAPE): 4912 return this 4913 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4914 4915 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4916 index = self._index 4917 4918 if not self._match(TokenType.INTERVAL) and match_interval: 4919 return None 4920 4921 if self._match(TokenType.STRING, advance=False): 4922 this = self._parse_primary() 4923 else: 4924 this = self._parse_term() 4925 4926 if not this or ( 4927 isinstance(this, exp.Column) 4928 and not this.table 4929 and not this.this.quoted 4930 and this.name.upper() == "IS" 4931 ): 4932 self._retreat(index) 4933 return None 4934 4935 unit = self._parse_function() or ( 4936 not self._match(TokenType.ALIAS, advance=False) 4937 and self._parse_var(any_token=True, upper=True) 4938 ) 4939 4940 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4941 # each INTERVAL expression into this canonical form so it's easy to transpile 4942 if this and this.is_number: 4943 this = exp.Literal.string(this.to_py()) 4944 elif this and this.is_string: 4945 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4946 if parts and unit: 4947 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4948 unit = None 4949 self._retreat(self._index - 1) 4950 4951 if len(parts) == 1: 4952 this = exp.Literal.string(parts[0][0]) 4953 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4954 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4955 unit = self.expression( 4956 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4957 ) 4958 4959 interval = self.expression(exp.Interval, this=this, unit=unit) 4960 4961 index = self._index 4962 self._match(TokenType.PLUS) 4963 4964 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4965 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4966 return self.expression( 4967 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4968 ) 4969 4970 self._retreat(index) 4971 return interval 4972 4973 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4974 this = self._parse_term() 4975 4976 while True: 4977 if self._match_set(self.BITWISE): 4978 this = self.expression( 4979 self.BITWISE[self._prev.token_type], 4980 this=this, 4981 expression=self._parse_term(), 4982 ) 4983 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4984 this = self.expression( 4985 exp.DPipe, 4986 this=this, 4987 expression=self._parse_term(), 4988 safe=not self.dialect.STRICT_STRING_CONCAT, 4989 ) 4990 elif self._match(TokenType.DQMARK): 4991 this = self.expression( 4992 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4993 ) 4994 elif self._match_pair(TokenType.LT, TokenType.LT): 4995 this = self.expression( 4996 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4997 ) 4998 elif self._match_pair(TokenType.GT, TokenType.GT): 4999 this = self.expression( 5000 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5001 ) 5002 else: 5003 break 5004 5005 return this 5006 5007 def _parse_term(self) -> t.Optional[exp.Expression]: 5008 this = self._parse_factor() 5009 5010 while self._match_set(self.TERM): 5011 klass = self.TERM[self._prev.token_type] 5012 comments = self._prev_comments 5013 expression = self._parse_factor() 5014 5015 this = self.expression(klass, this=this, comments=comments, expression=expression) 5016 5017 if isinstance(this, exp.Collate): 5018 expr = this.expression 5019 5020 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5021 # fallback to Identifier / Var 5022 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5023 ident = expr.this 5024 if isinstance(ident, exp.Identifier): 5025 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5026 5027 return this 5028 5029 def _parse_factor(self) -> t.Optional[exp.Expression]: 5030 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5031 this = parse_method() 5032 5033 while self._match_set(self.FACTOR): 5034 klass = self.FACTOR[self._prev.token_type] 5035 comments = self._prev_comments 5036 expression = parse_method() 5037 5038 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5039 self._retreat(self._index - 1) 5040 return this 5041 5042 this = self.expression(klass, this=this, comments=comments, expression=expression) 5043 5044 if isinstance(this, exp.Div): 5045 this.args["typed"] = self.dialect.TYPED_DIVISION 5046 this.args["safe"] = self.dialect.SAFE_DIVISION 5047 5048 return this 5049 5050 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5051 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5052 5053 def _parse_unary(self) -> t.Optional[exp.Expression]: 5054 if self._match_set(self.UNARY_PARSERS): 5055 return self.UNARY_PARSERS[self._prev.token_type](self) 5056 return self._parse_at_time_zone(self._parse_type()) 5057 5058 def _parse_type( 5059 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5060 ) -> t.Optional[exp.Expression]: 5061 interval = parse_interval and self._parse_interval() 5062 if interval: 5063 return interval 5064 5065 index = self._index 5066 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5067 5068 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5069 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5070 if isinstance(data_type, exp.Cast): 5071 # This constructor can contain ops directly after it, for instance struct unnesting: 5072 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5073 return self._parse_column_ops(data_type) 5074 5075 if data_type: 5076 index2 = self._index 5077 this = self._parse_primary() 5078 5079 if isinstance(this, exp.Literal): 5080 this = self._parse_column_ops(this) 5081 5082 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5083 if parser: 5084 return parser(self, this, data_type) 5085 5086 return self.expression(exp.Cast, this=this, to=data_type) 5087 5088 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5089 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5090 # 5091 # If the index difference here is greater than 1, that means the parser itself must have 5092 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5093 # 5094 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5095 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5096 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5097 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5098 # 5099 # In these cases, we don't really want to return the converted type, but instead retreat 5100 # and try to parse a Column or Identifier in the section below. 5101 if data_type.expressions and index2 - index > 1: 5102 self._retreat(index2) 5103 return self._parse_column_ops(data_type) 5104 5105 self._retreat(index) 5106 5107 if fallback_to_identifier: 5108 return self._parse_id_var() 5109 5110 this = self._parse_column() 5111 return this and self._parse_column_ops(this) 5112 5113 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5114 this = self._parse_type() 5115 if not this: 5116 return None 5117 5118 if isinstance(this, exp.Column) and not this.table: 5119 this = exp.var(this.name.upper()) 5120 5121 return self.expression( 5122 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5123 ) 5124 5125 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5126 type_name = identifier.name 5127 5128 while self._match(TokenType.DOT): 5129 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5130 5131 return exp.DataType.build(type_name, udt=True) 5132 5133 def _parse_types( 5134 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5135 ) -> t.Optional[exp.Expression]: 5136 index = self._index 5137 5138 this: t.Optional[exp.Expression] = None 5139 prefix = self._match_text_seq("SYSUDTLIB", ".") 5140 5141 if not self._match_set(self.TYPE_TOKENS): 5142 identifier = allow_identifiers and self._parse_id_var( 5143 any_token=False, tokens=(TokenType.VAR,) 5144 ) 5145 if isinstance(identifier, exp.Identifier): 5146 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5147 5148 if len(tokens) != 1: 5149 self.raise_error("Unexpected identifier", self._prev) 5150 5151 if tokens[0].token_type in self.TYPE_TOKENS: 5152 self._prev = tokens[0] 5153 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5154 this = self._parse_user_defined_type(identifier) 5155 else: 5156 self._retreat(self._index - 1) 5157 return None 5158 else: 5159 return None 5160 5161 type_token = self._prev.token_type 5162 5163 if type_token == TokenType.PSEUDO_TYPE: 5164 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5165 5166 if type_token == TokenType.OBJECT_IDENTIFIER: 5167 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5168 5169 # https://materialize.com/docs/sql/types/map/ 5170 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5171 key_type = self._parse_types( 5172 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5173 ) 5174 if not self._match(TokenType.FARROW): 5175 self._retreat(index) 5176 return None 5177 5178 value_type = self._parse_types( 5179 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5180 ) 5181 if not self._match(TokenType.R_BRACKET): 5182 self._retreat(index) 5183 return None 5184 5185 return exp.DataType( 5186 this=exp.DataType.Type.MAP, 5187 expressions=[key_type, value_type], 5188 nested=True, 5189 prefix=prefix, 5190 ) 5191 5192 nested = type_token in self.NESTED_TYPE_TOKENS 5193 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5194 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5195 expressions = None 5196 maybe_func = False 5197 5198 if self._match(TokenType.L_PAREN): 5199 if is_struct: 5200 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5201 elif nested: 5202 expressions = self._parse_csv( 5203 lambda: self._parse_types( 5204 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5205 ) 5206 ) 5207 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5208 this = expressions[0] 5209 this.set("nullable", True) 5210 self._match_r_paren() 5211 return this 5212 elif type_token in self.ENUM_TYPE_TOKENS: 5213 expressions = self._parse_csv(self._parse_equality) 5214 elif is_aggregate: 5215 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5216 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5217 ) 5218 if not func_or_ident: 5219 return None 5220 expressions = [func_or_ident] 5221 if self._match(TokenType.COMMA): 5222 expressions.extend( 5223 self._parse_csv( 5224 lambda: self._parse_types( 5225 check_func=check_func, 5226 schema=schema, 5227 allow_identifiers=allow_identifiers, 5228 ) 5229 ) 5230 ) 5231 else: 5232 expressions = self._parse_csv(self._parse_type_size) 5233 5234 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5235 if type_token == TokenType.VECTOR and len(expressions) == 2: 5236 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5237 5238 if not expressions or not self._match(TokenType.R_PAREN): 5239 self._retreat(index) 5240 return None 5241 5242 maybe_func = True 5243 5244 values: t.Optional[t.List[exp.Expression]] = None 5245 5246 if nested and self._match(TokenType.LT): 5247 if is_struct: 5248 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5249 else: 5250 expressions = self._parse_csv( 5251 lambda: self._parse_types( 5252 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5253 ) 5254 ) 5255 5256 if not self._match(TokenType.GT): 5257 self.raise_error("Expecting >") 5258 5259 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5260 values = self._parse_csv(self._parse_assignment) 5261 if not values and is_struct: 5262 values = None 5263 self._retreat(self._index - 1) 5264 else: 5265 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5266 5267 if type_token in self.TIMESTAMPS: 5268 if self._match_text_seq("WITH", "TIME", "ZONE"): 5269 maybe_func = False 5270 tz_type = ( 5271 exp.DataType.Type.TIMETZ 5272 if type_token in self.TIMES 5273 else exp.DataType.Type.TIMESTAMPTZ 5274 ) 5275 this = exp.DataType(this=tz_type, expressions=expressions) 5276 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5277 maybe_func = False 5278 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5279 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5280 maybe_func = False 5281 elif type_token == TokenType.INTERVAL: 5282 unit = self._parse_var(upper=True) 5283 if unit: 5284 if self._match_text_seq("TO"): 5285 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5286 5287 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5288 else: 5289 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5290 elif type_token == TokenType.VOID: 5291 this = exp.DataType(this=exp.DataType.Type.NULL) 5292 5293 if maybe_func and check_func: 5294 index2 = self._index 5295 peek = self._parse_string() 5296 5297 if not peek: 5298 self._retreat(index) 5299 return None 5300 5301 self._retreat(index2) 5302 5303 if not this: 5304 if self._match_text_seq("UNSIGNED"): 5305 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5306 if not unsigned_type_token: 5307 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5308 5309 type_token = unsigned_type_token or type_token 5310 5311 this = exp.DataType( 5312 this=exp.DataType.Type[type_token.value], 5313 expressions=expressions, 5314 nested=nested, 5315 prefix=prefix, 5316 ) 5317 5318 # Empty arrays/structs are allowed 5319 if values is not None: 5320 cls = exp.Struct if is_struct else exp.Array 5321 this = exp.cast(cls(expressions=values), this, copy=False) 5322 5323 elif expressions: 5324 this.set("expressions", expressions) 5325 5326 # https://materialize.com/docs/sql/types/list/#type-name 5327 while self._match(TokenType.LIST): 5328 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5329 5330 index = self._index 5331 5332 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5333 matched_array = self._match(TokenType.ARRAY) 5334 5335 while self._curr: 5336 datatype_token = self._prev.token_type 5337 matched_l_bracket = self._match(TokenType.L_BRACKET) 5338 5339 if (not matched_l_bracket and not matched_array) or ( 5340 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5341 ): 5342 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5343 # not to be confused with the fixed size array parsing 5344 break 5345 5346 matched_array = False 5347 values = self._parse_csv(self._parse_assignment) or None 5348 if ( 5349 values 5350 and not schema 5351 and ( 5352 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5353 ) 5354 ): 5355 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5356 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5357 self._retreat(index) 5358 break 5359 5360 this = exp.DataType( 5361 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5362 ) 5363 self._match(TokenType.R_BRACKET) 5364 5365 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5366 converter = self.TYPE_CONVERTERS.get(this.this) 5367 if converter: 5368 this = converter(t.cast(exp.DataType, this)) 5369 5370 return this 5371 5372 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5373 index = self._index 5374 5375 if ( 5376 self._curr 5377 and self._next 5378 and self._curr.token_type in self.TYPE_TOKENS 5379 and self._next.token_type in self.TYPE_TOKENS 5380 ): 5381 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5382 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5383 this = self._parse_id_var() 5384 else: 5385 this = ( 5386 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5387 or self._parse_id_var() 5388 ) 5389 5390 self._match(TokenType.COLON) 5391 5392 if ( 5393 type_required 5394 and not isinstance(this, exp.DataType) 5395 and not self._match_set(self.TYPE_TOKENS, advance=False) 5396 ): 5397 self._retreat(index) 5398 return self._parse_types() 5399 5400 return self._parse_column_def(this) 5401 5402 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5403 if not self._match_text_seq("AT", "TIME", "ZONE"): 5404 return this 5405 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5406 5407 def _parse_column(self) -> t.Optional[exp.Expression]: 5408 this = self._parse_column_reference() 5409 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5410 5411 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5412 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5413 5414 return column 5415 5416 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5417 this = self._parse_field() 5418 if ( 5419 not this 5420 and self._match(TokenType.VALUES, advance=False) 5421 and self.VALUES_FOLLOWED_BY_PAREN 5422 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5423 ): 5424 this = self._parse_id_var() 5425 5426 if isinstance(this, exp.Identifier): 5427 # We bubble up comments from the Identifier to the Column 5428 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5429 5430 return this 5431 5432 def _parse_colon_as_variant_extract( 5433 self, this: t.Optional[exp.Expression] 5434 ) -> t.Optional[exp.Expression]: 5435 casts = [] 5436 json_path = [] 5437 escape = None 5438 5439 while self._match(TokenType.COLON): 5440 start_index = self._index 5441 5442 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5443 path = self._parse_column_ops( 5444 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5445 ) 5446 5447 # The cast :: operator has a lower precedence than the extraction operator :, so 5448 # we rearrange the AST appropriately to avoid casting the JSON path 5449 while isinstance(path, exp.Cast): 5450 casts.append(path.to) 5451 path = path.this 5452 5453 if casts: 5454 dcolon_offset = next( 5455 i 5456 for i, t in enumerate(self._tokens[start_index:]) 5457 if t.token_type == TokenType.DCOLON 5458 ) 5459 end_token = self._tokens[start_index + dcolon_offset - 1] 5460 else: 5461 end_token = self._prev 5462 5463 if path: 5464 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5465 # it'll roundtrip to a string literal in GET_PATH 5466 if isinstance(path, exp.Identifier) and path.quoted: 5467 escape = True 5468 5469 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5470 5471 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5472 # Databricks transforms it back to the colon/dot notation 5473 if json_path: 5474 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5475 5476 if json_path_expr: 5477 json_path_expr.set("escape", escape) 5478 5479 this = self.expression( 5480 exp.JSONExtract, 5481 this=this, 5482 expression=json_path_expr, 5483 variant_extract=True, 5484 ) 5485 5486 while casts: 5487 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5488 5489 return this 5490 5491 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5492 return self._parse_types() 5493 5494 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5495 this = self._parse_bracket(this) 5496 5497 while self._match_set(self.COLUMN_OPERATORS): 5498 op_token = self._prev.token_type 5499 op = self.COLUMN_OPERATORS.get(op_token) 5500 5501 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5502 field = self._parse_dcolon() 5503 if not field: 5504 self.raise_error("Expected type") 5505 elif op and self._curr: 5506 field = self._parse_column_reference() or self._parse_bracket() 5507 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5508 field = self._parse_column_ops(field) 5509 else: 5510 field = self._parse_field(any_token=True, anonymous_func=True) 5511 5512 # Function calls can be qualified, e.g., x.y.FOO() 5513 # This converts the final AST to a series of Dots leading to the function call 5514 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5515 if isinstance(field, (exp.Func, exp.Window)) and this: 5516 this = this.transform( 5517 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5518 ) 5519 5520 if op: 5521 this = op(self, this, field) 5522 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5523 this = self.expression( 5524 exp.Column, 5525 comments=this.comments, 5526 this=field, 5527 table=this.this, 5528 db=this.args.get("table"), 5529 catalog=this.args.get("db"), 5530 ) 5531 elif isinstance(field, exp.Window): 5532 # Move the exp.Dot's to the window's function 5533 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5534 field.set("this", window_func) 5535 this = field 5536 else: 5537 this = self.expression(exp.Dot, this=this, expression=field) 5538 5539 if field and field.comments: 5540 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5541 5542 this = self._parse_bracket(this) 5543 5544 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5545 5546 def _parse_primary(self) -> t.Optional[exp.Expression]: 5547 if self._match_set(self.PRIMARY_PARSERS): 5548 token_type = self._prev.token_type 5549 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5550 5551 if token_type == TokenType.STRING: 5552 expressions = [primary] 5553 while self._match(TokenType.STRING): 5554 expressions.append(exp.Literal.string(self._prev.text)) 5555 5556 if len(expressions) > 1: 5557 return self.expression(exp.Concat, expressions=expressions) 5558 5559 return primary 5560 5561 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5562 return exp.Literal.number(f"0.{self._prev.text}") 5563 5564 if self._match(TokenType.L_PAREN): 5565 comments = self._prev_comments 5566 query = self._parse_select() 5567 5568 if query: 5569 expressions = [query] 5570 else: 5571 expressions = self._parse_expressions() 5572 5573 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5574 5575 if not this and self._match(TokenType.R_PAREN, advance=False): 5576 this = self.expression(exp.Tuple) 5577 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5578 this = self._parse_subquery(this=this, parse_alias=False) 5579 elif isinstance(this, exp.Subquery): 5580 this = self._parse_subquery( 5581 this=self._parse_set_operations(this), parse_alias=False 5582 ) 5583 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5584 this = self.expression(exp.Tuple, expressions=expressions) 5585 else: 5586 this = self.expression(exp.Paren, this=this) 5587 5588 if this: 5589 this.add_comments(comments) 5590 5591 self._match_r_paren(expression=this) 5592 return this 5593 5594 return None 5595 5596 def _parse_field( 5597 self, 5598 any_token: bool = False, 5599 tokens: t.Optional[t.Collection[TokenType]] = None, 5600 anonymous_func: bool = False, 5601 ) -> t.Optional[exp.Expression]: 5602 if anonymous_func: 5603 field = ( 5604 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5605 or self._parse_primary() 5606 ) 5607 else: 5608 field = self._parse_primary() or self._parse_function( 5609 anonymous=anonymous_func, any_token=any_token 5610 ) 5611 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5612 5613 def _parse_function( 5614 self, 5615 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5616 anonymous: bool = False, 5617 optional_parens: bool = True, 5618 any_token: bool = False, 5619 ) -> t.Optional[exp.Expression]: 5620 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5621 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5622 fn_syntax = False 5623 if ( 5624 self._match(TokenType.L_BRACE, advance=False) 5625 and self._next 5626 and self._next.text.upper() == "FN" 5627 ): 5628 self._advance(2) 5629 fn_syntax = True 5630 5631 func = self._parse_function_call( 5632 functions=functions, 5633 anonymous=anonymous, 5634 optional_parens=optional_parens, 5635 any_token=any_token, 5636 ) 5637 5638 if fn_syntax: 5639 self._match(TokenType.R_BRACE) 5640 5641 return func 5642 5643 def _parse_function_call( 5644 self, 5645 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5646 anonymous: bool = False, 5647 optional_parens: bool = True, 5648 any_token: bool = False, 5649 ) -> t.Optional[exp.Expression]: 5650 if not self._curr: 5651 return None 5652 5653 comments = self._curr.comments 5654 token = self._curr 5655 token_type = self._curr.token_type 5656 this = self._curr.text 5657 upper = this.upper() 5658 5659 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5660 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5661 self._advance() 5662 return self._parse_window(parser(self)) 5663 5664 if not self._next or self._next.token_type != TokenType.L_PAREN: 5665 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5666 self._advance() 5667 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5668 5669 return None 5670 5671 if any_token: 5672 if token_type in self.RESERVED_TOKENS: 5673 return None 5674 elif token_type not in self.FUNC_TOKENS: 5675 return None 5676 5677 self._advance(2) 5678 5679 parser = self.FUNCTION_PARSERS.get(upper) 5680 if parser and not anonymous: 5681 this = parser(self) 5682 else: 5683 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5684 5685 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5686 this = self.expression( 5687 subquery_predicate, comments=comments, this=self._parse_select() 5688 ) 5689 self._match_r_paren() 5690 return this 5691 5692 if functions is None: 5693 functions = self.FUNCTIONS 5694 5695 function = functions.get(upper) 5696 known_function = function and not anonymous 5697 5698 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5699 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5700 5701 post_func_comments = self._curr and self._curr.comments 5702 if known_function and post_func_comments: 5703 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5704 # call we'll construct it as exp.Anonymous, even if it's "known" 5705 if any( 5706 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5707 for comment in post_func_comments 5708 ): 5709 known_function = False 5710 5711 if alias and known_function: 5712 args = self._kv_to_prop_eq(args) 5713 5714 if known_function: 5715 func_builder = t.cast(t.Callable, function) 5716 5717 if "dialect" in func_builder.__code__.co_varnames: 5718 func = func_builder(args, dialect=self.dialect) 5719 else: 5720 func = func_builder(args) 5721 5722 func = self.validate_expression(func, args) 5723 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5724 func.meta["name"] = this 5725 5726 this = func 5727 else: 5728 if token_type == TokenType.IDENTIFIER: 5729 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5730 5731 this = self.expression(exp.Anonymous, this=this, expressions=args) 5732 this = this.update_positions(token) 5733 5734 if isinstance(this, exp.Expression): 5735 this.add_comments(comments) 5736 5737 self._match_r_paren(this) 5738 return self._parse_window(this) 5739 5740 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5741 return expression 5742 5743 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5744 transformed = [] 5745 5746 for index, e in enumerate(expressions): 5747 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5748 if isinstance(e, exp.Alias): 5749 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5750 5751 if not isinstance(e, exp.PropertyEQ): 5752 e = self.expression( 5753 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5754 ) 5755 5756 if isinstance(e.this, exp.Column): 5757 e.this.replace(e.this.this) 5758 else: 5759 e = self._to_prop_eq(e, index) 5760 5761 transformed.append(e) 5762 5763 return transformed 5764 5765 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5766 return self._parse_statement() 5767 5768 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5769 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5770 5771 def _parse_user_defined_function( 5772 self, kind: t.Optional[TokenType] = None 5773 ) -> t.Optional[exp.Expression]: 5774 this = self._parse_table_parts(schema=True) 5775 5776 if not self._match(TokenType.L_PAREN): 5777 return this 5778 5779 expressions = self._parse_csv(self._parse_function_parameter) 5780 self._match_r_paren() 5781 return self.expression( 5782 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5783 ) 5784 5785 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5786 literal = self._parse_primary() 5787 if literal: 5788 return self.expression(exp.Introducer, this=token.text, expression=literal) 5789 5790 return self._identifier_expression(token) 5791 5792 def _parse_session_parameter(self) -> exp.SessionParameter: 5793 kind = None 5794 this = self._parse_id_var() or self._parse_primary() 5795 5796 if this and self._match(TokenType.DOT): 5797 kind = this.name 5798 this = self._parse_var() or self._parse_primary() 5799 5800 return self.expression(exp.SessionParameter, this=this, kind=kind) 5801 5802 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5803 return self._parse_id_var() 5804 5805 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5806 index = self._index 5807 5808 if self._match(TokenType.L_PAREN): 5809 expressions = t.cast( 5810 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5811 ) 5812 5813 if not self._match(TokenType.R_PAREN): 5814 self._retreat(index) 5815 else: 5816 expressions = [self._parse_lambda_arg()] 5817 5818 if self._match_set(self.LAMBDAS): 5819 return self.LAMBDAS[self._prev.token_type](self, expressions) 5820 5821 self._retreat(index) 5822 5823 this: t.Optional[exp.Expression] 5824 5825 if self._match(TokenType.DISTINCT): 5826 this = self.expression( 5827 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5828 ) 5829 else: 5830 this = self._parse_select_or_expression(alias=alias) 5831 5832 return self._parse_limit( 5833 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5834 ) 5835 5836 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5837 index = self._index 5838 if not self._match(TokenType.L_PAREN): 5839 return this 5840 5841 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5842 # expr can be of both types 5843 if self._match_set(self.SELECT_START_TOKENS): 5844 self._retreat(index) 5845 return this 5846 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5847 self._match_r_paren() 5848 return self.expression(exp.Schema, this=this, expressions=args) 5849 5850 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5851 return self._parse_column_def(self._parse_field(any_token=True)) 5852 5853 def _parse_column_def( 5854 self, this: t.Optional[exp.Expression], computed_column: bool = True 5855 ) -> t.Optional[exp.Expression]: 5856 # column defs are not really columns, they're identifiers 5857 if isinstance(this, exp.Column): 5858 this = this.this 5859 5860 if not computed_column: 5861 self._match(TokenType.ALIAS) 5862 5863 kind = self._parse_types(schema=True) 5864 5865 if self._match_text_seq("FOR", "ORDINALITY"): 5866 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5867 5868 constraints: t.List[exp.Expression] = [] 5869 5870 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5871 ("ALIAS", "MATERIALIZED") 5872 ): 5873 persisted = self._prev.text.upper() == "MATERIALIZED" 5874 constraint_kind = exp.ComputedColumnConstraint( 5875 this=self._parse_assignment(), 5876 persisted=persisted or self._match_text_seq("PERSISTED"), 5877 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5878 ) 5879 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5880 elif ( 5881 kind 5882 and self._match(TokenType.ALIAS, advance=False) 5883 and ( 5884 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5885 or (self._next and self._next.token_type == TokenType.L_PAREN) 5886 ) 5887 ): 5888 self._advance() 5889 constraints.append( 5890 self.expression( 5891 exp.ColumnConstraint, 5892 kind=exp.ComputedColumnConstraint( 5893 this=self._parse_disjunction(), 5894 persisted=self._match_texts(("STORED", "VIRTUAL")) 5895 and self._prev.text.upper() == "STORED", 5896 ), 5897 ) 5898 ) 5899 5900 while True: 5901 constraint = self._parse_column_constraint() 5902 if not constraint: 5903 break 5904 constraints.append(constraint) 5905 5906 if not kind and not constraints: 5907 return this 5908 5909 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5910 5911 def _parse_auto_increment( 5912 self, 5913 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5914 start = None 5915 increment = None 5916 5917 if self._match(TokenType.L_PAREN, advance=False): 5918 args = self._parse_wrapped_csv(self._parse_bitwise) 5919 start = seq_get(args, 0) 5920 increment = seq_get(args, 1) 5921 elif self._match_text_seq("START"): 5922 start = self._parse_bitwise() 5923 self._match_text_seq("INCREMENT") 5924 increment = self._parse_bitwise() 5925 5926 if start and increment: 5927 return exp.GeneratedAsIdentityColumnConstraint( 5928 start=start, increment=increment, this=False 5929 ) 5930 5931 return exp.AutoIncrementColumnConstraint() 5932 5933 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5934 if not self._match_text_seq("REFRESH"): 5935 self._retreat(self._index - 1) 5936 return None 5937 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5938 5939 def _parse_compress(self) -> exp.CompressColumnConstraint: 5940 if self._match(TokenType.L_PAREN, advance=False): 5941 return self.expression( 5942 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5943 ) 5944 5945 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5946 5947 def _parse_generated_as_identity( 5948 self, 5949 ) -> ( 5950 exp.GeneratedAsIdentityColumnConstraint 5951 | exp.ComputedColumnConstraint 5952 | exp.GeneratedAsRowColumnConstraint 5953 ): 5954 if self._match_text_seq("BY", "DEFAULT"): 5955 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5956 this = self.expression( 5957 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5958 ) 5959 else: 5960 self._match_text_seq("ALWAYS") 5961 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5962 5963 self._match(TokenType.ALIAS) 5964 5965 if self._match_text_seq("ROW"): 5966 start = self._match_text_seq("START") 5967 if not start: 5968 self._match(TokenType.END) 5969 hidden = self._match_text_seq("HIDDEN") 5970 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5971 5972 identity = self._match_text_seq("IDENTITY") 5973 5974 if self._match(TokenType.L_PAREN): 5975 if self._match(TokenType.START_WITH): 5976 this.set("start", self._parse_bitwise()) 5977 if self._match_text_seq("INCREMENT", "BY"): 5978 this.set("increment", self._parse_bitwise()) 5979 if self._match_text_seq("MINVALUE"): 5980 this.set("minvalue", self._parse_bitwise()) 5981 if self._match_text_seq("MAXVALUE"): 5982 this.set("maxvalue", self._parse_bitwise()) 5983 5984 if self._match_text_seq("CYCLE"): 5985 this.set("cycle", True) 5986 elif self._match_text_seq("NO", "CYCLE"): 5987 this.set("cycle", False) 5988 5989 if not identity: 5990 this.set("expression", self._parse_range()) 5991 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5992 args = self._parse_csv(self._parse_bitwise) 5993 this.set("start", seq_get(args, 0)) 5994 this.set("increment", seq_get(args, 1)) 5995 5996 self._match_r_paren() 5997 5998 return this 5999 6000 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6001 self._match_text_seq("LENGTH") 6002 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6003 6004 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6005 if self._match_text_seq("NULL"): 6006 return self.expression(exp.NotNullColumnConstraint) 6007 if self._match_text_seq("CASESPECIFIC"): 6008 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6009 if self._match_text_seq("FOR", "REPLICATION"): 6010 return self.expression(exp.NotForReplicationColumnConstraint) 6011 6012 # Unconsume the `NOT` token 6013 self._retreat(self._index - 1) 6014 return None 6015 6016 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6017 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6018 6019 procedure_option_follows = ( 6020 self._match(TokenType.WITH, advance=False) 6021 and self._next 6022 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6023 ) 6024 6025 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6026 return self.expression( 6027 exp.ColumnConstraint, 6028 this=this, 6029 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6030 ) 6031 6032 return this 6033 6034 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6035 if not self._match(TokenType.CONSTRAINT): 6036 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6037 6038 return self.expression( 6039 exp.Constraint, 6040 this=self._parse_id_var(), 6041 expressions=self._parse_unnamed_constraints(), 6042 ) 6043 6044 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6045 constraints = [] 6046 while True: 6047 constraint = self._parse_unnamed_constraint() or self._parse_function() 6048 if not constraint: 6049 break 6050 constraints.append(constraint) 6051 6052 return constraints 6053 6054 def _parse_unnamed_constraint( 6055 self, constraints: t.Optional[t.Collection[str]] = None 6056 ) -> t.Optional[exp.Expression]: 6057 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6058 constraints or self.CONSTRAINT_PARSERS 6059 ): 6060 return None 6061 6062 constraint = self._prev.text.upper() 6063 if constraint not in self.CONSTRAINT_PARSERS: 6064 self.raise_error(f"No parser found for schema constraint {constraint}.") 6065 6066 return self.CONSTRAINT_PARSERS[constraint](self) 6067 6068 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6069 return self._parse_id_var(any_token=False) 6070 6071 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6072 self._match_text_seq("KEY") 6073 return self.expression( 6074 exp.UniqueColumnConstraint, 6075 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6076 this=self._parse_schema(self._parse_unique_key()), 6077 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6078 on_conflict=self._parse_on_conflict(), 6079 options=self._parse_key_constraint_options(), 6080 ) 6081 6082 def _parse_key_constraint_options(self) -> t.List[str]: 6083 options = [] 6084 while True: 6085 if not self._curr: 6086 break 6087 6088 if self._match(TokenType.ON): 6089 action = None 6090 on = self._advance_any() and self._prev.text 6091 6092 if self._match_text_seq("NO", "ACTION"): 6093 action = "NO ACTION" 6094 elif self._match_text_seq("CASCADE"): 6095 action = "CASCADE" 6096 elif self._match_text_seq("RESTRICT"): 6097 action = "RESTRICT" 6098 elif self._match_pair(TokenType.SET, TokenType.NULL): 6099 action = "SET NULL" 6100 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6101 action = "SET DEFAULT" 6102 else: 6103 self.raise_error("Invalid key constraint") 6104 6105 options.append(f"ON {on} {action}") 6106 else: 6107 var = self._parse_var_from_options( 6108 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6109 ) 6110 if not var: 6111 break 6112 options.append(var.name) 6113 6114 return options 6115 6116 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6117 if match and not self._match(TokenType.REFERENCES): 6118 return None 6119 6120 expressions = None 6121 this = self._parse_table(schema=True) 6122 options = self._parse_key_constraint_options() 6123 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6124 6125 def _parse_foreign_key(self) -> exp.ForeignKey: 6126 expressions = ( 6127 self._parse_wrapped_id_vars() 6128 if not self._match(TokenType.REFERENCES, advance=False) 6129 else None 6130 ) 6131 reference = self._parse_references() 6132 on_options = {} 6133 6134 while self._match(TokenType.ON): 6135 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6136 self.raise_error("Expected DELETE or UPDATE") 6137 6138 kind = self._prev.text.lower() 6139 6140 if self._match_text_seq("NO", "ACTION"): 6141 action = "NO ACTION" 6142 elif self._match(TokenType.SET): 6143 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6144 action = "SET " + self._prev.text.upper() 6145 else: 6146 self._advance() 6147 action = self._prev.text.upper() 6148 6149 on_options[kind] = action 6150 6151 return self.expression( 6152 exp.ForeignKey, 6153 expressions=expressions, 6154 reference=reference, 6155 options=self._parse_key_constraint_options(), 6156 **on_options, # type: ignore 6157 ) 6158 6159 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6160 return self._parse_ordered() or self._parse_field() 6161 6162 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6163 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6164 self._retreat(self._index - 1) 6165 return None 6166 6167 id_vars = self._parse_wrapped_id_vars() 6168 return self.expression( 6169 exp.PeriodForSystemTimeConstraint, 6170 this=seq_get(id_vars, 0), 6171 expression=seq_get(id_vars, 1), 6172 ) 6173 6174 def _parse_primary_key( 6175 self, wrapped_optional: bool = False, in_props: bool = False 6176 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6177 desc = ( 6178 self._match_set((TokenType.ASC, TokenType.DESC)) 6179 and self._prev.token_type == TokenType.DESC 6180 ) 6181 6182 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6183 return self.expression( 6184 exp.PrimaryKeyColumnConstraint, 6185 desc=desc, 6186 options=self._parse_key_constraint_options(), 6187 ) 6188 6189 expressions = self._parse_wrapped_csv( 6190 self._parse_primary_key_part, optional=wrapped_optional 6191 ) 6192 options = self._parse_key_constraint_options() 6193 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6194 6195 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6196 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6197 6198 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6199 """ 6200 Parses a datetime column in ODBC format. We parse the column into the corresponding 6201 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6202 same as we did for `DATE('yyyy-mm-dd')`. 6203 6204 Reference: 6205 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6206 """ 6207 self._match(TokenType.VAR) 6208 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6209 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6210 if not self._match(TokenType.R_BRACE): 6211 self.raise_error("Expected }") 6212 return expression 6213 6214 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6215 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6216 return this 6217 6218 bracket_kind = self._prev.token_type 6219 if ( 6220 bracket_kind == TokenType.L_BRACE 6221 and self._curr 6222 and self._curr.token_type == TokenType.VAR 6223 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6224 ): 6225 return self._parse_odbc_datetime_literal() 6226 6227 expressions = self._parse_csv( 6228 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6229 ) 6230 6231 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6232 self.raise_error("Expected ]") 6233 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6234 self.raise_error("Expected }") 6235 6236 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6237 if bracket_kind == TokenType.L_BRACE: 6238 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6239 elif not this: 6240 this = build_array_constructor( 6241 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6242 ) 6243 else: 6244 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6245 if constructor_type: 6246 return build_array_constructor( 6247 constructor_type, 6248 args=expressions, 6249 bracket_kind=bracket_kind, 6250 dialect=self.dialect, 6251 ) 6252 6253 expressions = apply_index_offset( 6254 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6255 ) 6256 this = self.expression( 6257 exp.Bracket, 6258 this=this, 6259 expressions=expressions, 6260 comments=this.pop_comments(), 6261 ) 6262 6263 self._add_comments(this) 6264 return self._parse_bracket(this) 6265 6266 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6267 if self._match(TokenType.COLON): 6268 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6269 return this 6270 6271 def _parse_case(self) -> t.Optional[exp.Expression]: 6272 ifs = [] 6273 default = None 6274 6275 comments = self._prev_comments 6276 expression = self._parse_assignment() 6277 6278 while self._match(TokenType.WHEN): 6279 this = self._parse_assignment() 6280 self._match(TokenType.THEN) 6281 then = self._parse_assignment() 6282 ifs.append(self.expression(exp.If, this=this, true=then)) 6283 6284 if self._match(TokenType.ELSE): 6285 default = self._parse_assignment() 6286 6287 if not self._match(TokenType.END): 6288 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6289 default = exp.column("interval") 6290 else: 6291 self.raise_error("Expected END after CASE", self._prev) 6292 6293 return self.expression( 6294 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6295 ) 6296 6297 def _parse_if(self) -> t.Optional[exp.Expression]: 6298 if self._match(TokenType.L_PAREN): 6299 args = self._parse_csv( 6300 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6301 ) 6302 this = self.validate_expression(exp.If.from_arg_list(args), args) 6303 self._match_r_paren() 6304 else: 6305 index = self._index - 1 6306 6307 if self.NO_PAREN_IF_COMMANDS and index == 0: 6308 return self._parse_as_command(self._prev) 6309 6310 condition = self._parse_assignment() 6311 6312 if not condition: 6313 self._retreat(index) 6314 return None 6315 6316 self._match(TokenType.THEN) 6317 true = self._parse_assignment() 6318 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6319 self._match(TokenType.END) 6320 this = self.expression(exp.If, this=condition, true=true, false=false) 6321 6322 return this 6323 6324 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6325 if not self._match_text_seq("VALUE", "FOR"): 6326 self._retreat(self._index - 1) 6327 return None 6328 6329 return self.expression( 6330 exp.NextValueFor, 6331 this=self._parse_column(), 6332 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6333 ) 6334 6335 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6336 this = self._parse_function() or self._parse_var_or_string(upper=True) 6337 6338 if self._match(TokenType.FROM): 6339 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6340 6341 if not self._match(TokenType.COMMA): 6342 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6343 6344 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6345 6346 def _parse_gap_fill(self) -> exp.GapFill: 6347 self._match(TokenType.TABLE) 6348 this = self._parse_table() 6349 6350 self._match(TokenType.COMMA) 6351 args = [this, *self._parse_csv(self._parse_lambda)] 6352 6353 gap_fill = exp.GapFill.from_arg_list(args) 6354 return self.validate_expression(gap_fill, args) 6355 6356 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6357 this = self._parse_assignment() 6358 6359 if not self._match(TokenType.ALIAS): 6360 if self._match(TokenType.COMMA): 6361 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6362 6363 self.raise_error("Expected AS after CAST") 6364 6365 fmt = None 6366 to = self._parse_types() 6367 6368 default = self._match(TokenType.DEFAULT) 6369 if default: 6370 default = self._parse_bitwise() 6371 self._match_text_seq("ON", "CONVERSION", "ERROR") 6372 6373 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6374 fmt_string = self._parse_string() 6375 fmt = self._parse_at_time_zone(fmt_string) 6376 6377 if not to: 6378 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6379 if to.this in exp.DataType.TEMPORAL_TYPES: 6380 this = self.expression( 6381 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6382 this=this, 6383 format=exp.Literal.string( 6384 format_time( 6385 fmt_string.this if fmt_string else "", 6386 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6387 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6388 ) 6389 ), 6390 safe=safe, 6391 ) 6392 6393 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6394 this.set("zone", fmt.args["zone"]) 6395 return this 6396 elif not to: 6397 self.raise_error("Expected TYPE after CAST") 6398 elif isinstance(to, exp.Identifier): 6399 to = exp.DataType.build(to.name, udt=True) 6400 elif to.this == exp.DataType.Type.CHAR: 6401 if self._match(TokenType.CHARACTER_SET): 6402 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6403 6404 return self.expression( 6405 exp.Cast if strict else exp.TryCast, 6406 this=this, 6407 to=to, 6408 format=fmt, 6409 safe=safe, 6410 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6411 default=default, 6412 ) 6413 6414 def _parse_string_agg(self) -> exp.GroupConcat: 6415 if self._match(TokenType.DISTINCT): 6416 args: t.List[t.Optional[exp.Expression]] = [ 6417 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6418 ] 6419 if self._match(TokenType.COMMA): 6420 args.extend(self._parse_csv(self._parse_assignment)) 6421 else: 6422 args = self._parse_csv(self._parse_assignment) # type: ignore 6423 6424 if self._match_text_seq("ON", "OVERFLOW"): 6425 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6426 if self._match_text_seq("ERROR"): 6427 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6428 else: 6429 self._match_text_seq("TRUNCATE") 6430 on_overflow = self.expression( 6431 exp.OverflowTruncateBehavior, 6432 this=self._parse_string(), 6433 with_count=( 6434 self._match_text_seq("WITH", "COUNT") 6435 or not self._match_text_seq("WITHOUT", "COUNT") 6436 ), 6437 ) 6438 else: 6439 on_overflow = None 6440 6441 index = self._index 6442 if not self._match(TokenType.R_PAREN) and args: 6443 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6444 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6445 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6446 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6447 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6448 6449 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6450 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6451 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6452 if not self._match_text_seq("WITHIN", "GROUP"): 6453 self._retreat(index) 6454 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6455 6456 # The corresponding match_r_paren will be called in parse_function (caller) 6457 self._match_l_paren() 6458 6459 return self.expression( 6460 exp.GroupConcat, 6461 this=self._parse_order(this=seq_get(args, 0)), 6462 separator=seq_get(args, 1), 6463 on_overflow=on_overflow, 6464 ) 6465 6466 def _parse_convert( 6467 self, strict: bool, safe: t.Optional[bool] = None 6468 ) -> t.Optional[exp.Expression]: 6469 this = self._parse_bitwise() 6470 6471 if self._match(TokenType.USING): 6472 to: t.Optional[exp.Expression] = self.expression( 6473 exp.CharacterSet, this=self._parse_var() 6474 ) 6475 elif self._match(TokenType.COMMA): 6476 to = self._parse_types() 6477 else: 6478 to = None 6479 6480 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6481 6482 def _parse_xml_table(self) -> exp.XMLTable: 6483 namespaces = None 6484 passing = None 6485 columns = None 6486 6487 if self._match_text_seq("XMLNAMESPACES", "("): 6488 namespaces = self._parse_xml_namespace() 6489 self._match_text_seq(")", ",") 6490 6491 this = self._parse_string() 6492 6493 if self._match_text_seq("PASSING"): 6494 # The BY VALUE keywords are optional and are provided for semantic clarity 6495 self._match_text_seq("BY", "VALUE") 6496 passing = self._parse_csv(self._parse_column) 6497 6498 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6499 6500 if self._match_text_seq("COLUMNS"): 6501 columns = self._parse_csv(self._parse_field_def) 6502 6503 return self.expression( 6504 exp.XMLTable, 6505 this=this, 6506 namespaces=namespaces, 6507 passing=passing, 6508 columns=columns, 6509 by_ref=by_ref, 6510 ) 6511 6512 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6513 namespaces = [] 6514 6515 while True: 6516 if self._match(TokenType.DEFAULT): 6517 uri = self._parse_string() 6518 else: 6519 uri = self._parse_alias(self._parse_string()) 6520 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6521 if not self._match(TokenType.COMMA): 6522 break 6523 6524 return namespaces 6525 6526 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6527 """ 6528 There are generally two variants of the DECODE function: 6529 6530 - DECODE(bin, charset) 6531 - DECODE(expression, search, result [, search, result] ... [, default]) 6532 6533 The second variant will always be parsed into a CASE expression. Note that NULL 6534 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6535 instead of relying on pattern matching. 6536 """ 6537 args = self._parse_csv(self._parse_assignment) 6538 6539 if len(args) < 3: 6540 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6541 6542 expression, *expressions = args 6543 if not expression: 6544 return None 6545 6546 ifs = [] 6547 for search, result in zip(expressions[::2], expressions[1::2]): 6548 if not search or not result: 6549 return None 6550 6551 if isinstance(search, exp.Literal): 6552 ifs.append( 6553 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6554 ) 6555 elif isinstance(search, exp.Null): 6556 ifs.append( 6557 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6558 ) 6559 else: 6560 cond = exp.or_( 6561 exp.EQ(this=expression.copy(), expression=search), 6562 exp.and_( 6563 exp.Is(this=expression.copy(), expression=exp.Null()), 6564 exp.Is(this=search.copy(), expression=exp.Null()), 6565 copy=False, 6566 ), 6567 copy=False, 6568 ) 6569 ifs.append(exp.If(this=cond, true=result)) 6570 6571 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6572 6573 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6574 self._match_text_seq("KEY") 6575 key = self._parse_column() 6576 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6577 self._match_text_seq("VALUE") 6578 value = self._parse_bitwise() 6579 6580 if not key and not value: 6581 return None 6582 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6583 6584 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6585 if not this or not self._match_text_seq("FORMAT", "JSON"): 6586 return this 6587 6588 return self.expression(exp.FormatJson, this=this) 6589 6590 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6591 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6592 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6593 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6594 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6595 else: 6596 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6597 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6598 6599 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6600 6601 if not empty and not error and not null: 6602 return None 6603 6604 return self.expression( 6605 exp.OnCondition, 6606 empty=empty, 6607 error=error, 6608 null=null, 6609 ) 6610 6611 def _parse_on_handling( 6612 self, on: str, *values: str 6613 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6614 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6615 for value in values: 6616 if self._match_text_seq(value, "ON", on): 6617 return f"{value} ON {on}" 6618 6619 index = self._index 6620 if self._match(TokenType.DEFAULT): 6621 default_value = self._parse_bitwise() 6622 if self._match_text_seq("ON", on): 6623 return default_value 6624 6625 self._retreat(index) 6626 6627 return None 6628 6629 @t.overload 6630 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6631 6632 @t.overload 6633 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6634 6635 def _parse_json_object(self, agg=False): 6636 star = self._parse_star() 6637 expressions = ( 6638 [star] 6639 if star 6640 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6641 ) 6642 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6643 6644 unique_keys = None 6645 if self._match_text_seq("WITH", "UNIQUE"): 6646 unique_keys = True 6647 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6648 unique_keys = False 6649 6650 self._match_text_seq("KEYS") 6651 6652 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6653 self._parse_type() 6654 ) 6655 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6656 6657 return self.expression( 6658 exp.JSONObjectAgg if agg else exp.JSONObject, 6659 expressions=expressions, 6660 null_handling=null_handling, 6661 unique_keys=unique_keys, 6662 return_type=return_type, 6663 encoding=encoding, 6664 ) 6665 6666 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6667 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6668 if not self._match_text_seq("NESTED"): 6669 this = self._parse_id_var() 6670 kind = self._parse_types(allow_identifiers=False) 6671 nested = None 6672 else: 6673 this = None 6674 kind = None 6675 nested = True 6676 6677 path = self._match_text_seq("PATH") and self._parse_string() 6678 nested_schema = nested and self._parse_json_schema() 6679 6680 return self.expression( 6681 exp.JSONColumnDef, 6682 this=this, 6683 kind=kind, 6684 path=path, 6685 nested_schema=nested_schema, 6686 ) 6687 6688 def _parse_json_schema(self) -> exp.JSONSchema: 6689 self._match_text_seq("COLUMNS") 6690 return self.expression( 6691 exp.JSONSchema, 6692 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6693 ) 6694 6695 def _parse_json_table(self) -> exp.JSONTable: 6696 this = self._parse_format_json(self._parse_bitwise()) 6697 path = self._match(TokenType.COMMA) and self._parse_string() 6698 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6699 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6700 schema = self._parse_json_schema() 6701 6702 return exp.JSONTable( 6703 this=this, 6704 schema=schema, 6705 path=path, 6706 error_handling=error_handling, 6707 empty_handling=empty_handling, 6708 ) 6709 6710 def _parse_match_against(self) -> exp.MatchAgainst: 6711 expressions = self._parse_csv(self._parse_column) 6712 6713 self._match_text_seq(")", "AGAINST", "(") 6714 6715 this = self._parse_string() 6716 6717 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6718 modifier = "IN NATURAL LANGUAGE MODE" 6719 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6720 modifier = f"{modifier} WITH QUERY EXPANSION" 6721 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6722 modifier = "IN BOOLEAN MODE" 6723 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6724 modifier = "WITH QUERY EXPANSION" 6725 else: 6726 modifier = None 6727 6728 return self.expression( 6729 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6730 ) 6731 6732 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6733 def _parse_open_json(self) -> exp.OpenJSON: 6734 this = self._parse_bitwise() 6735 path = self._match(TokenType.COMMA) and self._parse_string() 6736 6737 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6738 this = self._parse_field(any_token=True) 6739 kind = self._parse_types() 6740 path = self._parse_string() 6741 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6742 6743 return self.expression( 6744 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6745 ) 6746 6747 expressions = None 6748 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6749 self._match_l_paren() 6750 expressions = self._parse_csv(_parse_open_json_column_def) 6751 6752 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6753 6754 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6755 args = self._parse_csv(self._parse_bitwise) 6756 6757 if self._match(TokenType.IN): 6758 return self.expression( 6759 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6760 ) 6761 6762 if haystack_first: 6763 haystack = seq_get(args, 0) 6764 needle = seq_get(args, 1) 6765 else: 6766 haystack = seq_get(args, 1) 6767 needle = seq_get(args, 0) 6768 6769 return self.expression( 6770 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6771 ) 6772 6773 def _parse_predict(self) -> exp.Predict: 6774 self._match_text_seq("MODEL") 6775 this = self._parse_table() 6776 6777 self._match(TokenType.COMMA) 6778 self._match_text_seq("TABLE") 6779 6780 return self.expression( 6781 exp.Predict, 6782 this=this, 6783 expression=self._parse_table(), 6784 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6785 ) 6786 6787 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6788 args = self._parse_csv(self._parse_table) 6789 return exp.JoinHint(this=func_name.upper(), expressions=args) 6790 6791 def _parse_substring(self) -> exp.Substring: 6792 # Postgres supports the form: substring(string [from int] [for int]) 6793 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6794 6795 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6796 6797 if self._match(TokenType.FROM): 6798 args.append(self._parse_bitwise()) 6799 if self._match(TokenType.FOR): 6800 if len(args) == 1: 6801 args.append(exp.Literal.number(1)) 6802 args.append(self._parse_bitwise()) 6803 6804 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6805 6806 def _parse_trim(self) -> exp.Trim: 6807 # https://www.w3resource.com/sql/character-functions/trim.php 6808 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6809 6810 position = None 6811 collation = None 6812 expression = None 6813 6814 if self._match_texts(self.TRIM_TYPES): 6815 position = self._prev.text.upper() 6816 6817 this = self._parse_bitwise() 6818 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6819 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6820 expression = self._parse_bitwise() 6821 6822 if invert_order: 6823 this, expression = expression, this 6824 6825 if self._match(TokenType.COLLATE): 6826 collation = self._parse_bitwise() 6827 6828 return self.expression( 6829 exp.Trim, this=this, position=position, expression=expression, collation=collation 6830 ) 6831 6832 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6833 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6834 6835 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6836 return self._parse_window(self._parse_id_var(), alias=True) 6837 6838 def _parse_respect_or_ignore_nulls( 6839 self, this: t.Optional[exp.Expression] 6840 ) -> t.Optional[exp.Expression]: 6841 if self._match_text_seq("IGNORE", "NULLS"): 6842 return self.expression(exp.IgnoreNulls, this=this) 6843 if self._match_text_seq("RESPECT", "NULLS"): 6844 return self.expression(exp.RespectNulls, this=this) 6845 return this 6846 6847 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6848 if self._match(TokenType.HAVING): 6849 self._match_texts(("MAX", "MIN")) 6850 max = self._prev.text.upper() != "MIN" 6851 return self.expression( 6852 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6853 ) 6854 6855 return this 6856 6857 def _parse_window( 6858 self, this: t.Optional[exp.Expression], alias: bool = False 6859 ) -> t.Optional[exp.Expression]: 6860 func = this 6861 comments = func.comments if isinstance(func, exp.Expression) else None 6862 6863 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6864 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6865 if self._match_text_seq("WITHIN", "GROUP"): 6866 order = self._parse_wrapped(self._parse_order) 6867 this = self.expression(exp.WithinGroup, this=this, expression=order) 6868 6869 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6870 self._match(TokenType.WHERE) 6871 this = self.expression( 6872 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6873 ) 6874 self._match_r_paren() 6875 6876 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6877 # Some dialects choose to implement and some do not. 6878 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6879 6880 # There is some code above in _parse_lambda that handles 6881 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6882 6883 # The below changes handle 6884 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6885 6886 # Oracle allows both formats 6887 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6888 # and Snowflake chose to do the same for familiarity 6889 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6890 if isinstance(this, exp.AggFunc): 6891 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6892 6893 if ignore_respect and ignore_respect is not this: 6894 ignore_respect.replace(ignore_respect.this) 6895 this = self.expression(ignore_respect.__class__, this=this) 6896 6897 this = self._parse_respect_or_ignore_nulls(this) 6898 6899 # bigquery select from window x AS (partition by ...) 6900 if alias: 6901 over = None 6902 self._match(TokenType.ALIAS) 6903 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6904 return this 6905 else: 6906 over = self._prev.text.upper() 6907 6908 if comments and isinstance(func, exp.Expression): 6909 func.pop_comments() 6910 6911 if not self._match(TokenType.L_PAREN): 6912 return self.expression( 6913 exp.Window, 6914 comments=comments, 6915 this=this, 6916 alias=self._parse_id_var(False), 6917 over=over, 6918 ) 6919 6920 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6921 6922 first = self._match(TokenType.FIRST) 6923 if self._match_text_seq("LAST"): 6924 first = False 6925 6926 partition, order = self._parse_partition_and_order() 6927 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6928 6929 if kind: 6930 self._match(TokenType.BETWEEN) 6931 start = self._parse_window_spec() 6932 self._match(TokenType.AND) 6933 end = self._parse_window_spec() 6934 exclude = ( 6935 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6936 if self._match_text_seq("EXCLUDE") 6937 else None 6938 ) 6939 6940 spec = self.expression( 6941 exp.WindowSpec, 6942 kind=kind, 6943 start=start["value"], 6944 start_side=start["side"], 6945 end=end["value"], 6946 end_side=end["side"], 6947 exclude=exclude, 6948 ) 6949 else: 6950 spec = None 6951 6952 self._match_r_paren() 6953 6954 window = self.expression( 6955 exp.Window, 6956 comments=comments, 6957 this=this, 6958 partition_by=partition, 6959 order=order, 6960 spec=spec, 6961 alias=window_alias, 6962 over=over, 6963 first=first, 6964 ) 6965 6966 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6967 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6968 return self._parse_window(window, alias=alias) 6969 6970 return window 6971 6972 def _parse_partition_and_order( 6973 self, 6974 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6975 return self._parse_partition_by(), self._parse_order() 6976 6977 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6978 self._match(TokenType.BETWEEN) 6979 6980 return { 6981 "value": ( 6982 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6983 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6984 or self._parse_bitwise() 6985 ), 6986 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6987 } 6988 6989 def _parse_alias( 6990 self, this: t.Optional[exp.Expression], explicit: bool = False 6991 ) -> t.Optional[exp.Expression]: 6992 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6993 # so this section tries to parse the clause version and if it fails, it treats the token 6994 # as an identifier (alias) 6995 if self._can_parse_limit_or_offset(): 6996 return this 6997 6998 any_token = self._match(TokenType.ALIAS) 6999 comments = self._prev_comments or [] 7000 7001 if explicit and not any_token: 7002 return this 7003 7004 if self._match(TokenType.L_PAREN): 7005 aliases = self.expression( 7006 exp.Aliases, 7007 comments=comments, 7008 this=this, 7009 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7010 ) 7011 self._match_r_paren(aliases) 7012 return aliases 7013 7014 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7015 self.STRING_ALIASES and self._parse_string_as_identifier() 7016 ) 7017 7018 if alias: 7019 comments.extend(alias.pop_comments()) 7020 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7021 column = this.this 7022 7023 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7024 if not this.comments and column and column.comments: 7025 this.comments = column.pop_comments() 7026 7027 return this 7028 7029 def _parse_id_var( 7030 self, 7031 any_token: bool = True, 7032 tokens: t.Optional[t.Collection[TokenType]] = None, 7033 ) -> t.Optional[exp.Expression]: 7034 expression = self._parse_identifier() 7035 if not expression and ( 7036 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7037 ): 7038 quoted = self._prev.token_type == TokenType.STRING 7039 expression = self._identifier_expression(quoted=quoted) 7040 7041 return expression 7042 7043 def _parse_string(self) -> t.Optional[exp.Expression]: 7044 if self._match_set(self.STRING_PARSERS): 7045 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7046 return self._parse_placeholder() 7047 7048 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7049 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7050 if output: 7051 output.update_positions(self._prev) 7052 return output 7053 7054 def _parse_number(self) -> t.Optional[exp.Expression]: 7055 if self._match_set(self.NUMERIC_PARSERS): 7056 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7057 return self._parse_placeholder() 7058 7059 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7060 if self._match(TokenType.IDENTIFIER): 7061 return self._identifier_expression(quoted=True) 7062 return self._parse_placeholder() 7063 7064 def _parse_var( 7065 self, 7066 any_token: bool = False, 7067 tokens: t.Optional[t.Collection[TokenType]] = None, 7068 upper: bool = False, 7069 ) -> t.Optional[exp.Expression]: 7070 if ( 7071 (any_token and self._advance_any()) 7072 or self._match(TokenType.VAR) 7073 or (self._match_set(tokens) if tokens else False) 7074 ): 7075 return self.expression( 7076 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7077 ) 7078 return self._parse_placeholder() 7079 7080 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7081 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7082 self._advance() 7083 return self._prev 7084 return None 7085 7086 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7087 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7088 7089 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7090 return self._parse_primary() or self._parse_var(any_token=True) 7091 7092 def _parse_null(self) -> t.Optional[exp.Expression]: 7093 if self._match_set(self.NULL_TOKENS): 7094 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7095 return self._parse_placeholder() 7096 7097 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7098 if self._match(TokenType.TRUE): 7099 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7100 if self._match(TokenType.FALSE): 7101 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7102 return self._parse_placeholder() 7103 7104 def _parse_star(self) -> t.Optional[exp.Expression]: 7105 if self._match(TokenType.STAR): 7106 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7107 return self._parse_placeholder() 7108 7109 def _parse_parameter(self) -> exp.Parameter: 7110 this = self._parse_identifier() or self._parse_primary_or_var() 7111 return self.expression(exp.Parameter, this=this) 7112 7113 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7114 if self._match_set(self.PLACEHOLDER_PARSERS): 7115 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7116 if placeholder: 7117 return placeholder 7118 self._advance(-1) 7119 return None 7120 7121 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7122 if not self._match_texts(keywords): 7123 return None 7124 if self._match(TokenType.L_PAREN, advance=False): 7125 return self._parse_wrapped_csv(self._parse_expression) 7126 7127 expression = self._parse_expression() 7128 return [expression] if expression else None 7129 7130 def _parse_csv( 7131 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7132 ) -> t.List[exp.Expression]: 7133 parse_result = parse_method() 7134 items = [parse_result] if parse_result is not None else [] 7135 7136 while self._match(sep): 7137 self._add_comments(parse_result) 7138 parse_result = parse_method() 7139 if parse_result is not None: 7140 items.append(parse_result) 7141 7142 return items 7143 7144 def _parse_tokens( 7145 self, parse_method: t.Callable, expressions: t.Dict 7146 ) -> t.Optional[exp.Expression]: 7147 this = parse_method() 7148 7149 while self._match_set(expressions): 7150 this = self.expression( 7151 expressions[self._prev.token_type], 7152 this=this, 7153 comments=self._prev_comments, 7154 expression=parse_method(), 7155 ) 7156 7157 return this 7158 7159 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7160 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7161 7162 def _parse_wrapped_csv( 7163 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7164 ) -> t.List[exp.Expression]: 7165 return self._parse_wrapped( 7166 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7167 ) 7168 7169 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7170 wrapped = self._match(TokenType.L_PAREN) 7171 if not wrapped and not optional: 7172 self.raise_error("Expecting (") 7173 parse_result = parse_method() 7174 if wrapped: 7175 self._match_r_paren() 7176 return parse_result 7177 7178 def _parse_expressions(self) -> t.List[exp.Expression]: 7179 return self._parse_csv(self._parse_expression) 7180 7181 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7182 return self._parse_select() or self._parse_set_operations( 7183 self._parse_alias(self._parse_assignment(), explicit=True) 7184 if alias 7185 else self._parse_assignment() 7186 ) 7187 7188 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7189 return self._parse_query_modifiers( 7190 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7191 ) 7192 7193 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7194 this = None 7195 if self._match_texts(self.TRANSACTION_KIND): 7196 this = self._prev.text 7197 7198 self._match_texts(("TRANSACTION", "WORK")) 7199 7200 modes = [] 7201 while True: 7202 mode = [] 7203 while self._match(TokenType.VAR): 7204 mode.append(self._prev.text) 7205 7206 if mode: 7207 modes.append(" ".join(mode)) 7208 if not self._match(TokenType.COMMA): 7209 break 7210 7211 return self.expression(exp.Transaction, this=this, modes=modes) 7212 7213 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7214 chain = None 7215 savepoint = None 7216 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7217 7218 self._match_texts(("TRANSACTION", "WORK")) 7219 7220 if self._match_text_seq("TO"): 7221 self._match_text_seq("SAVEPOINT") 7222 savepoint = self._parse_id_var() 7223 7224 if self._match(TokenType.AND): 7225 chain = not self._match_text_seq("NO") 7226 self._match_text_seq("CHAIN") 7227 7228 if is_rollback: 7229 return self.expression(exp.Rollback, savepoint=savepoint) 7230 7231 return self.expression(exp.Commit, chain=chain) 7232 7233 def _parse_refresh(self) -> exp.Refresh: 7234 self._match(TokenType.TABLE) 7235 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7236 7237 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7238 if not self._prev.text.upper() == "ADD": 7239 return None 7240 7241 start = self._index 7242 self._match(TokenType.COLUMN) 7243 7244 exists_column = self._parse_exists(not_=True) 7245 expression = self._parse_field_def() 7246 7247 if not isinstance(expression, exp.ColumnDef): 7248 self._retreat(start) 7249 return None 7250 7251 expression.set("exists", exists_column) 7252 7253 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7254 if self._match_texts(("FIRST", "AFTER")): 7255 position = self._prev.text 7256 column_position = self.expression( 7257 exp.ColumnPosition, this=self._parse_column(), position=position 7258 ) 7259 expression.set("position", column_position) 7260 7261 return expression 7262 7263 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7264 drop = self._match(TokenType.DROP) and self._parse_drop() 7265 if drop and not isinstance(drop, exp.Command): 7266 drop.set("kind", drop.args.get("kind", "COLUMN")) 7267 return drop 7268 7269 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7270 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7271 return self.expression( 7272 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7273 ) 7274 7275 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7276 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7277 self._match_text_seq("ADD") 7278 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7279 return self.expression( 7280 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7281 ) 7282 7283 column_def = self._parse_add_column() 7284 if isinstance(column_def, exp.ColumnDef): 7285 return column_def 7286 7287 exists = self._parse_exists(not_=True) 7288 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7289 return self.expression( 7290 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7291 ) 7292 7293 return None 7294 7295 if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( 7296 "COLUMNS" 7297 ): 7298 schema = self._parse_schema() 7299 7300 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7301 7302 return self._parse_csv(_parse_add_alteration) 7303 7304 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7305 if self._match_texts(self.ALTER_ALTER_PARSERS): 7306 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7307 7308 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7309 # keyword after ALTER we default to parsing this statement 7310 self._match(TokenType.COLUMN) 7311 column = self._parse_field(any_token=True) 7312 7313 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7314 return self.expression(exp.AlterColumn, this=column, drop=True) 7315 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7316 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7317 if self._match(TokenType.COMMENT): 7318 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7319 if self._match_text_seq("DROP", "NOT", "NULL"): 7320 return self.expression( 7321 exp.AlterColumn, 7322 this=column, 7323 drop=True, 7324 allow_null=True, 7325 ) 7326 if self._match_text_seq("SET", "NOT", "NULL"): 7327 return self.expression( 7328 exp.AlterColumn, 7329 this=column, 7330 allow_null=False, 7331 ) 7332 7333 if self._match_text_seq("SET", "VISIBLE"): 7334 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7335 if self._match_text_seq("SET", "INVISIBLE"): 7336 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7337 7338 self._match_text_seq("SET", "DATA") 7339 self._match_text_seq("TYPE") 7340 return self.expression( 7341 exp.AlterColumn, 7342 this=column, 7343 dtype=self._parse_types(), 7344 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7345 using=self._match(TokenType.USING) and self._parse_assignment(), 7346 ) 7347 7348 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7349 if self._match_texts(("ALL", "EVEN", "AUTO")): 7350 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7351 7352 self._match_text_seq("KEY", "DISTKEY") 7353 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7354 7355 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7356 if compound: 7357 self._match_text_seq("SORTKEY") 7358 7359 if self._match(TokenType.L_PAREN, advance=False): 7360 return self.expression( 7361 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7362 ) 7363 7364 self._match_texts(("AUTO", "NONE")) 7365 return self.expression( 7366 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7367 ) 7368 7369 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7370 index = self._index - 1 7371 7372 partition_exists = self._parse_exists() 7373 if self._match(TokenType.PARTITION, advance=False): 7374 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7375 7376 self._retreat(index) 7377 return self._parse_csv(self._parse_drop_column) 7378 7379 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7380 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7381 exists = self._parse_exists() 7382 old_column = self._parse_column() 7383 to = self._match_text_seq("TO") 7384 new_column = self._parse_column() 7385 7386 if old_column is None or to is None or new_column is None: 7387 return None 7388 7389 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7390 7391 self._match_text_seq("TO") 7392 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7393 7394 def _parse_alter_table_set(self) -> exp.AlterSet: 7395 alter_set = self.expression(exp.AlterSet) 7396 7397 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7398 "TABLE", "PROPERTIES" 7399 ): 7400 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7401 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7402 alter_set.set("expressions", [self._parse_assignment()]) 7403 elif self._match_texts(("LOGGED", "UNLOGGED")): 7404 alter_set.set("option", exp.var(self._prev.text.upper())) 7405 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7406 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7407 elif self._match_text_seq("LOCATION"): 7408 alter_set.set("location", self._parse_field()) 7409 elif self._match_text_seq("ACCESS", "METHOD"): 7410 alter_set.set("access_method", self._parse_field()) 7411 elif self._match_text_seq("TABLESPACE"): 7412 alter_set.set("tablespace", self._parse_field()) 7413 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7414 alter_set.set("file_format", [self._parse_field()]) 7415 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7416 alter_set.set("file_format", self._parse_wrapped_options()) 7417 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7418 alter_set.set("copy_options", self._parse_wrapped_options()) 7419 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7420 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7421 else: 7422 if self._match_text_seq("SERDE"): 7423 alter_set.set("serde", self._parse_field()) 7424 7425 properties = self._parse_wrapped(self._parse_properties, optional=True) 7426 alter_set.set("expressions", [properties]) 7427 7428 return alter_set 7429 7430 def _parse_alter(self) -> exp.Alter | exp.Command: 7431 start = self._prev 7432 7433 alter_token = self._match_set(self.ALTERABLES) and self._prev 7434 if not alter_token: 7435 return self._parse_as_command(start) 7436 7437 exists = self._parse_exists() 7438 only = self._match_text_seq("ONLY") 7439 this = self._parse_table(schema=True) 7440 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7441 7442 if self._next: 7443 self._advance() 7444 7445 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7446 if parser: 7447 actions = ensure_list(parser(self)) 7448 not_valid = self._match_text_seq("NOT", "VALID") 7449 options = self._parse_csv(self._parse_property) 7450 7451 if not self._curr and actions: 7452 return self.expression( 7453 exp.Alter, 7454 this=this, 7455 kind=alter_token.text.upper(), 7456 exists=exists, 7457 actions=actions, 7458 only=only, 7459 options=options, 7460 cluster=cluster, 7461 not_valid=not_valid, 7462 ) 7463 7464 return self._parse_as_command(start) 7465 7466 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7467 start = self._prev 7468 # https://duckdb.org/docs/sql/statements/analyze 7469 if not self._curr: 7470 return self.expression(exp.Analyze) 7471 7472 options = [] 7473 while self._match_texts(self.ANALYZE_STYLES): 7474 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7475 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7476 else: 7477 options.append(self._prev.text.upper()) 7478 7479 this: t.Optional[exp.Expression] = None 7480 inner_expression: t.Optional[exp.Expression] = None 7481 7482 kind = self._curr and self._curr.text.upper() 7483 7484 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7485 this = self._parse_table_parts() 7486 elif self._match_text_seq("TABLES"): 7487 if self._match_set((TokenType.FROM, TokenType.IN)): 7488 kind = f"{kind} {self._prev.text.upper()}" 7489 this = self._parse_table(schema=True, is_db_reference=True) 7490 elif self._match_text_seq("DATABASE"): 7491 this = self._parse_table(schema=True, is_db_reference=True) 7492 elif self._match_text_seq("CLUSTER"): 7493 this = self._parse_table() 7494 # Try matching inner expr keywords before fallback to parse table. 7495 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7496 kind = None 7497 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7498 else: 7499 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7500 kind = None 7501 this = self._parse_table_parts() 7502 7503 partition = self._try_parse(self._parse_partition) 7504 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7505 return self._parse_as_command(start) 7506 7507 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7508 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7509 "WITH", "ASYNC", "MODE" 7510 ): 7511 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7512 else: 7513 mode = None 7514 7515 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7516 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7517 7518 properties = self._parse_properties() 7519 return self.expression( 7520 exp.Analyze, 7521 kind=kind, 7522 this=this, 7523 mode=mode, 7524 partition=partition, 7525 properties=properties, 7526 expression=inner_expression, 7527 options=options, 7528 ) 7529 7530 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7531 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7532 this = None 7533 kind = self._prev.text.upper() 7534 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7535 expressions = [] 7536 7537 if not self._match_text_seq("STATISTICS"): 7538 self.raise_error("Expecting token STATISTICS") 7539 7540 if self._match_text_seq("NOSCAN"): 7541 this = "NOSCAN" 7542 elif self._match(TokenType.FOR): 7543 if self._match_text_seq("ALL", "COLUMNS"): 7544 this = "FOR ALL COLUMNS" 7545 if self._match_texts("COLUMNS"): 7546 this = "FOR COLUMNS" 7547 expressions = self._parse_csv(self._parse_column_reference) 7548 elif self._match_text_seq("SAMPLE"): 7549 sample = self._parse_number() 7550 expressions = [ 7551 self.expression( 7552 exp.AnalyzeSample, 7553 sample=sample, 7554 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7555 ) 7556 ] 7557 7558 return self.expression( 7559 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7560 ) 7561 7562 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7563 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7564 kind = None 7565 this = None 7566 expression: t.Optional[exp.Expression] = None 7567 if self._match_text_seq("REF", "UPDATE"): 7568 kind = "REF" 7569 this = "UPDATE" 7570 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7571 this = "UPDATE SET DANGLING TO NULL" 7572 elif self._match_text_seq("STRUCTURE"): 7573 kind = "STRUCTURE" 7574 if self._match_text_seq("CASCADE", "FAST"): 7575 this = "CASCADE FAST" 7576 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7577 ("ONLINE", "OFFLINE") 7578 ): 7579 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7580 expression = self._parse_into() 7581 7582 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7583 7584 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7585 this = self._prev.text.upper() 7586 if self._match_text_seq("COLUMNS"): 7587 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7588 return None 7589 7590 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7591 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7592 if self._match_text_seq("STATISTICS"): 7593 return self.expression(exp.AnalyzeDelete, kind=kind) 7594 return None 7595 7596 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7597 if self._match_text_seq("CHAINED", "ROWS"): 7598 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7599 return None 7600 7601 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7602 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7603 this = self._prev.text.upper() 7604 expression: t.Optional[exp.Expression] = None 7605 expressions = [] 7606 update_options = None 7607 7608 if self._match_text_seq("HISTOGRAM", "ON"): 7609 expressions = self._parse_csv(self._parse_column_reference) 7610 with_expressions = [] 7611 while self._match(TokenType.WITH): 7612 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7613 if self._match_texts(("SYNC", "ASYNC")): 7614 if self._match_text_seq("MODE", advance=False): 7615 with_expressions.append(f"{self._prev.text.upper()} MODE") 7616 self._advance() 7617 else: 7618 buckets = self._parse_number() 7619 if self._match_text_seq("BUCKETS"): 7620 with_expressions.append(f"{buckets} BUCKETS") 7621 if with_expressions: 7622 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7623 7624 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7625 TokenType.UPDATE, advance=False 7626 ): 7627 update_options = self._prev.text.upper() 7628 self._advance() 7629 elif self._match_text_seq("USING", "DATA"): 7630 expression = self.expression(exp.UsingData, this=self._parse_string()) 7631 7632 return self.expression( 7633 exp.AnalyzeHistogram, 7634 this=this, 7635 expressions=expressions, 7636 expression=expression, 7637 update_options=update_options, 7638 ) 7639 7640 def _parse_merge(self) -> exp.Merge: 7641 self._match(TokenType.INTO) 7642 target = self._parse_table() 7643 7644 if target and self._match(TokenType.ALIAS, advance=False): 7645 target.set("alias", self._parse_table_alias()) 7646 7647 self._match(TokenType.USING) 7648 using = self._parse_table() 7649 7650 self._match(TokenType.ON) 7651 on = self._parse_assignment() 7652 7653 return self.expression( 7654 exp.Merge, 7655 this=target, 7656 using=using, 7657 on=on, 7658 whens=self._parse_when_matched(), 7659 returning=self._parse_returning(), 7660 ) 7661 7662 def _parse_when_matched(self) -> exp.Whens: 7663 whens = [] 7664 7665 while self._match(TokenType.WHEN): 7666 matched = not self._match(TokenType.NOT) 7667 self._match_text_seq("MATCHED") 7668 source = ( 7669 False 7670 if self._match_text_seq("BY", "TARGET") 7671 else self._match_text_seq("BY", "SOURCE") 7672 ) 7673 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7674 7675 self._match(TokenType.THEN) 7676 7677 if self._match(TokenType.INSERT): 7678 this = self._parse_star() 7679 if this: 7680 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7681 else: 7682 then = self.expression( 7683 exp.Insert, 7684 this=exp.var("ROW") 7685 if self._match_text_seq("ROW") 7686 else self._parse_value(values=False), 7687 expression=self._match_text_seq("VALUES") and self._parse_value(), 7688 ) 7689 elif self._match(TokenType.UPDATE): 7690 expressions = self._parse_star() 7691 if expressions: 7692 then = self.expression(exp.Update, expressions=expressions) 7693 else: 7694 then = self.expression( 7695 exp.Update, 7696 expressions=self._match(TokenType.SET) 7697 and self._parse_csv(self._parse_equality), 7698 ) 7699 elif self._match(TokenType.DELETE): 7700 then = self.expression(exp.Var, this=self._prev.text) 7701 else: 7702 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7703 7704 whens.append( 7705 self.expression( 7706 exp.When, 7707 matched=matched, 7708 source=source, 7709 condition=condition, 7710 then=then, 7711 ) 7712 ) 7713 return self.expression(exp.Whens, expressions=whens) 7714 7715 def _parse_show(self) -> t.Optional[exp.Expression]: 7716 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7717 if parser: 7718 return parser(self) 7719 return self._parse_as_command(self._prev) 7720 7721 def _parse_set_item_assignment( 7722 self, kind: t.Optional[str] = None 7723 ) -> t.Optional[exp.Expression]: 7724 index = self._index 7725 7726 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7727 return self._parse_set_transaction(global_=kind == "GLOBAL") 7728 7729 left = self._parse_primary() or self._parse_column() 7730 assignment_delimiter = self._match_texts(("=", "TO")) 7731 7732 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7733 self._retreat(index) 7734 return None 7735 7736 right = self._parse_statement() or self._parse_id_var() 7737 if isinstance(right, (exp.Column, exp.Identifier)): 7738 right = exp.var(right.name) 7739 7740 this = self.expression(exp.EQ, this=left, expression=right) 7741 return self.expression(exp.SetItem, this=this, kind=kind) 7742 7743 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7744 self._match_text_seq("TRANSACTION") 7745 characteristics = self._parse_csv( 7746 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7747 ) 7748 return self.expression( 7749 exp.SetItem, 7750 expressions=characteristics, 7751 kind="TRANSACTION", 7752 **{"global": global_}, # type: ignore 7753 ) 7754 7755 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7756 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7757 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7758 7759 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7760 index = self._index 7761 set_ = self.expression( 7762 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7763 ) 7764 7765 if self._curr: 7766 self._retreat(index) 7767 return self._parse_as_command(self._prev) 7768 7769 return set_ 7770 7771 def _parse_var_from_options( 7772 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7773 ) -> t.Optional[exp.Var]: 7774 start = self._curr 7775 if not start: 7776 return None 7777 7778 option = start.text.upper() 7779 continuations = options.get(option) 7780 7781 index = self._index 7782 self._advance() 7783 for keywords in continuations or []: 7784 if isinstance(keywords, str): 7785 keywords = (keywords,) 7786 7787 if self._match_text_seq(*keywords): 7788 option = f"{option} {' '.join(keywords)}" 7789 break 7790 else: 7791 if continuations or continuations is None: 7792 if raise_unmatched: 7793 self.raise_error(f"Unknown option {option}") 7794 7795 self._retreat(index) 7796 return None 7797 7798 return exp.var(option) 7799 7800 def _parse_as_command(self, start: Token) -> exp.Command: 7801 while self._curr: 7802 self._advance() 7803 text = self._find_sql(start, self._prev) 7804 size = len(start.text) 7805 self._warn_unsupported() 7806 return exp.Command(this=text[:size], expression=text[size:]) 7807 7808 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7809 settings = [] 7810 7811 self._match_l_paren() 7812 kind = self._parse_id_var() 7813 7814 if self._match(TokenType.L_PAREN): 7815 while True: 7816 key = self._parse_id_var() 7817 value = self._parse_primary() 7818 if not key and value is None: 7819 break 7820 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7821 self._match(TokenType.R_PAREN) 7822 7823 self._match_r_paren() 7824 7825 return self.expression( 7826 exp.DictProperty, 7827 this=this, 7828 kind=kind.this if kind else None, 7829 settings=settings, 7830 ) 7831 7832 def _parse_dict_range(self, this: str) -> exp.DictRange: 7833 self._match_l_paren() 7834 has_min = self._match_text_seq("MIN") 7835 if has_min: 7836 min = self._parse_var() or self._parse_primary() 7837 self._match_text_seq("MAX") 7838 max = self._parse_var() or self._parse_primary() 7839 else: 7840 max = self._parse_var() or self._parse_primary() 7841 min = exp.Literal.number(0) 7842 self._match_r_paren() 7843 return self.expression(exp.DictRange, this=this, min=min, max=max) 7844 7845 def _parse_comprehension( 7846 self, this: t.Optional[exp.Expression] 7847 ) -> t.Optional[exp.Comprehension]: 7848 index = self._index 7849 expression = self._parse_column() 7850 if not self._match(TokenType.IN): 7851 self._retreat(index - 1) 7852 return None 7853 iterator = self._parse_column() 7854 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7855 return self.expression( 7856 exp.Comprehension, 7857 this=this, 7858 expression=expression, 7859 iterator=iterator, 7860 condition=condition, 7861 ) 7862 7863 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7864 if self._match(TokenType.HEREDOC_STRING): 7865 return self.expression(exp.Heredoc, this=self._prev.text) 7866 7867 if not self._match_text_seq("$"): 7868 return None 7869 7870 tags = ["$"] 7871 tag_text = None 7872 7873 if self._is_connected(): 7874 self._advance() 7875 tags.append(self._prev.text.upper()) 7876 else: 7877 self.raise_error("No closing $ found") 7878 7879 if tags[-1] != "$": 7880 if self._is_connected() and self._match_text_seq("$"): 7881 tag_text = tags[-1] 7882 tags.append("$") 7883 else: 7884 self.raise_error("No closing $ found") 7885 7886 heredoc_start = self._curr 7887 7888 while self._curr: 7889 if self._match_text_seq(*tags, advance=False): 7890 this = self._find_sql(heredoc_start, self._prev) 7891 self._advance(len(tags)) 7892 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7893 7894 self._advance() 7895 7896 self.raise_error(f"No closing {''.join(tags)} found") 7897 return None 7898 7899 def _find_parser( 7900 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7901 ) -> t.Optional[t.Callable]: 7902 if not self._curr: 7903 return None 7904 7905 index = self._index 7906 this = [] 7907 while True: 7908 # The current token might be multiple words 7909 curr = self._curr.text.upper() 7910 key = curr.split(" ") 7911 this.append(curr) 7912 7913 self._advance() 7914 result, trie = in_trie(trie, key) 7915 if result == TrieResult.FAILED: 7916 break 7917 7918 if result == TrieResult.EXISTS: 7919 subparser = parsers[" ".join(this)] 7920 return subparser 7921 7922 self._retreat(index) 7923 return None 7924 7925 def _match(self, token_type, advance=True, expression=None): 7926 if not self._curr: 7927 return None 7928 7929 if self._curr.token_type == token_type: 7930 if advance: 7931 self._advance() 7932 self._add_comments(expression) 7933 return True 7934 7935 return None 7936 7937 def _match_set(self, types, advance=True): 7938 if not self._curr: 7939 return None 7940 7941 if self._curr.token_type in types: 7942 if advance: 7943 self._advance() 7944 return True 7945 7946 return None 7947 7948 def _match_pair(self, token_type_a, token_type_b, advance=True): 7949 if not self._curr or not self._next: 7950 return None 7951 7952 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7953 if advance: 7954 self._advance(2) 7955 return True 7956 7957 return None 7958 7959 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7960 if not self._match(TokenType.L_PAREN, expression=expression): 7961 self.raise_error("Expecting (") 7962 7963 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7964 if not self._match(TokenType.R_PAREN, expression=expression): 7965 self.raise_error("Expecting )") 7966 7967 def _match_texts(self, texts, advance=True): 7968 if ( 7969 self._curr 7970 and self._curr.token_type != TokenType.STRING 7971 and self._curr.text.upper() in texts 7972 ): 7973 if advance: 7974 self._advance() 7975 return True 7976 return None 7977 7978 def _match_text_seq(self, *texts, advance=True): 7979 index = self._index 7980 for text in texts: 7981 if ( 7982 self._curr 7983 and self._curr.token_type != TokenType.STRING 7984 and self._curr.text.upper() == text 7985 ): 7986 self._advance() 7987 else: 7988 self._retreat(index) 7989 return None 7990 7991 if not advance: 7992 self._retreat(index) 7993 7994 return True 7995 7996 def _replace_lambda( 7997 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7998 ) -> t.Optional[exp.Expression]: 7999 if not node: 8000 return node 8001 8002 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8003 8004 for column in node.find_all(exp.Column): 8005 typ = lambda_types.get(column.parts[0].name) 8006 if typ is not None: 8007 dot_or_id = column.to_dot() if column.table else column.this 8008 8009 if typ: 8010 dot_or_id = self.expression( 8011 exp.Cast, 8012 this=dot_or_id, 8013 to=typ, 8014 ) 8015 8016 parent = column.parent 8017 8018 while isinstance(parent, exp.Dot): 8019 if not isinstance(parent.parent, exp.Dot): 8020 parent.replace(dot_or_id) 8021 break 8022 parent = parent.parent 8023 else: 8024 if column is node: 8025 node = dot_or_id 8026 else: 8027 column.replace(dot_or_id) 8028 return node 8029 8030 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8031 start = self._prev 8032 8033 # Not to be confused with TRUNCATE(number, decimals) function call 8034 if self._match(TokenType.L_PAREN): 8035 self._retreat(self._index - 2) 8036 return self._parse_function() 8037 8038 # Clickhouse supports TRUNCATE DATABASE as well 8039 is_database = self._match(TokenType.DATABASE) 8040 8041 self._match(TokenType.TABLE) 8042 8043 exists = self._parse_exists(not_=False) 8044 8045 expressions = self._parse_csv( 8046 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8047 ) 8048 8049 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8050 8051 if self._match_text_seq("RESTART", "IDENTITY"): 8052 identity = "RESTART" 8053 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8054 identity = "CONTINUE" 8055 else: 8056 identity = None 8057 8058 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8059 option = self._prev.text 8060 else: 8061 option = None 8062 8063 partition = self._parse_partition() 8064 8065 # Fallback case 8066 if self._curr: 8067 return self._parse_as_command(start) 8068 8069 return self.expression( 8070 exp.TruncateTable, 8071 expressions=expressions, 8072 is_database=is_database, 8073 exists=exists, 8074 cluster=cluster, 8075 identity=identity, 8076 option=option, 8077 partition=partition, 8078 ) 8079 8080 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8081 this = self._parse_ordered(self._parse_opclass) 8082 8083 if not self._match(TokenType.WITH): 8084 return this 8085 8086 op = self._parse_var(any_token=True) 8087 8088 return self.expression(exp.WithOperator, this=this, op=op) 8089 8090 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8091 self._match(TokenType.EQ) 8092 self._match(TokenType.L_PAREN) 8093 8094 opts: t.List[t.Optional[exp.Expression]] = [] 8095 option: exp.Expression | None 8096 while self._curr and not self._match(TokenType.R_PAREN): 8097 if self._match_text_seq("FORMAT_NAME", "="): 8098 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8099 option = self._parse_format_name() 8100 else: 8101 option = self._parse_property() 8102 8103 if option is None: 8104 self.raise_error("Unable to parse option") 8105 break 8106 8107 opts.append(option) 8108 8109 return opts 8110 8111 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8112 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8113 8114 options = [] 8115 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8116 option = self._parse_var(any_token=True) 8117 prev = self._prev.text.upper() 8118 8119 # Different dialects might separate options and values by white space, "=" and "AS" 8120 self._match(TokenType.EQ) 8121 self._match(TokenType.ALIAS) 8122 8123 param = self.expression(exp.CopyParameter, this=option) 8124 8125 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8126 TokenType.L_PAREN, advance=False 8127 ): 8128 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8129 param.set("expressions", self._parse_wrapped_options()) 8130 elif prev == "FILE_FORMAT": 8131 # T-SQL's external file format case 8132 param.set("expression", self._parse_field()) 8133 else: 8134 param.set("expression", self._parse_unquoted_field()) 8135 8136 options.append(param) 8137 self._match(sep) 8138 8139 return options 8140 8141 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8142 expr = self.expression(exp.Credentials) 8143 8144 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8145 expr.set("storage", self._parse_field()) 8146 if self._match_text_seq("CREDENTIALS"): 8147 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8148 creds = ( 8149 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8150 ) 8151 expr.set("credentials", creds) 8152 if self._match_text_seq("ENCRYPTION"): 8153 expr.set("encryption", self._parse_wrapped_options()) 8154 if self._match_text_seq("IAM_ROLE"): 8155 expr.set("iam_role", self._parse_field()) 8156 if self._match_text_seq("REGION"): 8157 expr.set("region", self._parse_field()) 8158 8159 return expr 8160 8161 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8162 return self._parse_field() 8163 8164 def _parse_copy(self) -> exp.Copy | exp.Command: 8165 start = self._prev 8166 8167 self._match(TokenType.INTO) 8168 8169 this = ( 8170 self._parse_select(nested=True, parse_subquery_alias=False) 8171 if self._match(TokenType.L_PAREN, advance=False) 8172 else self._parse_table(schema=True) 8173 ) 8174 8175 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8176 8177 files = self._parse_csv(self._parse_file_location) 8178 credentials = self._parse_credentials() 8179 8180 self._match_text_seq("WITH") 8181 8182 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8183 8184 # Fallback case 8185 if self._curr: 8186 return self._parse_as_command(start) 8187 8188 return self.expression( 8189 exp.Copy, 8190 this=this, 8191 kind=kind, 8192 credentials=credentials, 8193 files=files, 8194 params=params, 8195 ) 8196 8197 def _parse_normalize(self) -> exp.Normalize: 8198 return self.expression( 8199 exp.Normalize, 8200 this=self._parse_bitwise(), 8201 form=self._match(TokenType.COMMA) and self._parse_var(), 8202 ) 8203 8204 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8205 args = self._parse_csv(lambda: self._parse_lambda()) 8206 8207 this = seq_get(args, 0) 8208 decimals = seq_get(args, 1) 8209 8210 return expr_type( 8211 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8212 ) 8213 8214 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8215 star_token = self._prev 8216 8217 if self._match_text_seq("COLUMNS", "(", advance=False): 8218 this = self._parse_function() 8219 if isinstance(this, exp.Columns): 8220 this.set("unpack", True) 8221 return this 8222 8223 return self.expression( 8224 exp.Star, 8225 **{ # type: ignore 8226 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8227 "replace": self._parse_star_op("REPLACE"), 8228 "rename": self._parse_star_op("RENAME"), 8229 }, 8230 ).update_positions(star_token) 8231 8232 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8233 privilege_parts = [] 8234 8235 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8236 # (end of privilege list) or L_PAREN (start of column list) are met 8237 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8238 privilege_parts.append(self._curr.text.upper()) 8239 self._advance() 8240 8241 this = exp.var(" ".join(privilege_parts)) 8242 expressions = ( 8243 self._parse_wrapped_csv(self._parse_column) 8244 if self._match(TokenType.L_PAREN, advance=False) 8245 else None 8246 ) 8247 8248 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8249 8250 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8251 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8252 principal = self._parse_id_var() 8253 8254 if not principal: 8255 return None 8256 8257 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8258 8259 def _parse_grant(self) -> exp.Grant | exp.Command: 8260 start = self._prev 8261 8262 privileges = self._parse_csv(self._parse_grant_privilege) 8263 8264 self._match(TokenType.ON) 8265 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8266 8267 # Attempt to parse the securable e.g. MySQL allows names 8268 # such as "foo.*", "*.*" which are not easily parseable yet 8269 securable = self._try_parse(self._parse_table_parts) 8270 8271 if not securable or not self._match_text_seq("TO"): 8272 return self._parse_as_command(start) 8273 8274 principals = self._parse_csv(self._parse_grant_principal) 8275 8276 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8277 8278 if self._curr: 8279 return self._parse_as_command(start) 8280 8281 return self.expression( 8282 exp.Grant, 8283 privileges=privileges, 8284 kind=kind, 8285 securable=securable, 8286 principals=principals, 8287 grant_option=grant_option, 8288 ) 8289 8290 def _parse_overlay(self) -> exp.Overlay: 8291 return self.expression( 8292 exp.Overlay, 8293 **{ # type: ignore 8294 "this": self._parse_bitwise(), 8295 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8296 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8297 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8298 }, 8299 ) 8300 8301 def _parse_format_name(self) -> exp.Property: 8302 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8303 # for FILE_FORMAT = <format_name> 8304 return self.expression( 8305 exp.Property, 8306 this=exp.var("FORMAT_NAME"), 8307 value=self._parse_string() or self._parse_table_parts(), 8308 ) 8309 8310 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8311 args: t.List[exp.Expression] = [] 8312 8313 if self._match(TokenType.DISTINCT): 8314 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8315 self._match(TokenType.COMMA) 8316 8317 args.extend(self._parse_csv(self._parse_assignment)) 8318 8319 return self.expression( 8320 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8321 ) 8322 8323 def _identifier_expression( 8324 self, token: t.Optional[Token] = None, **kwargs: t.Any 8325 ) -> exp.Identifier: 8326 token = token or self._prev 8327 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8328 expression.update_positions(token) 8329 return expression 8330 8331 def _build_pipe_cte(self, query: exp.Query, expressions: t.List[exp.Expression]) -> exp.Query: 8332 if query.selects: 8333 self._pipe_cte_counter += 1 8334 new_cte = f"__tmp{self._pipe_cte_counter}" 8335 8336 # For `exp.Select`, generated CTEs are attached to its `with` 8337 # For `exp.SetOperation`, generated CTEs are attached to the `with` of its LHS, accessed via `this` 8338 with_ = ( 8339 query.args.get("with") 8340 if isinstance(query, exp.Select) 8341 else query.this.args.get("with") 8342 ) 8343 ctes = with_.pop() if with_ else None 8344 8345 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8346 if ctes: 8347 new_select.set("with", ctes) 8348 8349 return new_select.with_(new_cte, as_=query, copy=False) 8350 8351 return query.select(*expressions, copy=False) 8352 8353 def _parse_pipe_syntax_select(self, query: exp.Query) -> exp.Query: 8354 select = self._parse_select() 8355 if isinstance(select, exp.Select): 8356 return self._build_pipe_cte(query, select.expressions) 8357 8358 return query 8359 8360 def _parse_pipe_syntax_where(self, query: exp.Query) -> exp.Query: 8361 where = self._parse_where() 8362 return query.where(where, copy=False) 8363 8364 def _parse_pipe_syntax_limit(self, query: exp.Query) -> exp.Query: 8365 limit = self._parse_limit() 8366 offset = self._parse_offset() 8367 if limit: 8368 curr_limit = query.args.get("limit", limit) 8369 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8370 query.limit(limit, copy=False) 8371 if offset: 8372 curr_offset = query.args.get("offset") 8373 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8374 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8375 return query 8376 8377 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8378 this = self._parse_assignment() 8379 if self._match_text_seq("GROUP", "AND", advance=False): 8380 return this 8381 8382 this = self._parse_alias(this) 8383 8384 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8385 return self._parse_ordered(lambda: this) 8386 8387 return this 8388 8389 def _parse_pipe_syntax_aggregate_group_order_by( 8390 self, query: exp.Query, group_by_exists: bool = True 8391 ) -> exp.Query: 8392 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8393 aggregates_or_groups, orders = [], [] 8394 for element in expr: 8395 if isinstance(element, exp.Ordered): 8396 this = element.this 8397 if isinstance(this, exp.Alias): 8398 element.set("this", this.args["alias"]) 8399 orders.append(element) 8400 else: 8401 this = element 8402 aggregates_or_groups.append(this) 8403 8404 if group_by_exists and isinstance(query, exp.Select): 8405 query = query.select(*aggregates_or_groups, copy=False).group_by( 8406 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8407 copy=False, 8408 ) 8409 else: 8410 query = query.select(*aggregates_or_groups, append=False, copy=False) 8411 8412 if orders: 8413 return query.order_by(*orders, append=False, copy=False) 8414 8415 return query 8416 8417 def _parse_pipe_syntax_aggregate(self, query: exp.Query) -> exp.Query: 8418 self._match_text_seq("AGGREGATE") 8419 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8420 8421 if self._match(TokenType.GROUP_BY) or ( 8422 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8423 ): 8424 return self._parse_pipe_syntax_aggregate_group_order_by(query) 8425 8426 return query 8427 8428 def _parse_pipe_syntax_set_operator( 8429 self, query: t.Optional[exp.Query] 8430 ) -> t.Optional[exp.Query]: 8431 first_setop = self.parse_set_operation(this=query) 8432 8433 if not first_setop or not query: 8434 return None 8435 8436 if not query.selects: 8437 query.select("*", copy=False) 8438 8439 this = first_setop.this.pop() 8440 distinct = first_setop.args.pop("distinct") 8441 setops = [first_setop.expression.pop(), *self._parse_expressions()] 8442 8443 if isinstance(first_setop, exp.Union): 8444 query = query.union(*setops, distinct=distinct, copy=False, **first_setop.args) 8445 elif isinstance(first_setop, exp.Except): 8446 query = query.except_(*setops, distinct=distinct, copy=False, **first_setop.args) 8447 else: 8448 query = query.intersect(*setops, distinct=distinct, copy=False, **first_setop.args) 8449 8450 return self._build_pipe_cte( 8451 query, [projection.args.get("alias", projection) for projection in this.expressions] 8452 ) 8453 8454 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8455 while self._match(TokenType.PIPE_GT): 8456 start = self._curr 8457 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8458 if not parser: 8459 set_op_query = self._parse_pipe_syntax_set_operator(query) 8460 if not set_op_query: 8461 self._retreat(start) 8462 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8463 break 8464 8465 query = set_op_query 8466 else: 8467 query = parser(self, query) 8468 8469 if query and not query.selects: 8470 return query.select("*", copy=False) 8471 8472 return query
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1534 def __init__( 1535 self, 1536 error_level: t.Optional[ErrorLevel] = None, 1537 error_message_context: int = 100, 1538 max_errors: int = 3, 1539 dialect: DialectType = None, 1540 ): 1541 from sqlglot.dialects import Dialect 1542 1543 self.error_level = error_level or ErrorLevel.IMMEDIATE 1544 self.error_message_context = error_message_context 1545 self.max_errors = max_errors 1546 self.dialect = Dialect.get_or_raise(dialect) 1547 self.reset()
1560 def parse( 1561 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1562 ) -> t.List[t.Optional[exp.Expression]]: 1563 """ 1564 Parses a list of tokens and returns a list of syntax trees, one tree 1565 per parsed SQL statement. 1566 1567 Args: 1568 raw_tokens: The list of tokens. 1569 sql: The original SQL string, used to produce helpful debug messages. 1570 1571 Returns: 1572 The list of the produced syntax trees. 1573 """ 1574 return self._parse( 1575 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1576 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1578 def parse_into( 1579 self, 1580 expression_types: exp.IntoType, 1581 raw_tokens: t.List[Token], 1582 sql: t.Optional[str] = None, 1583 ) -> t.List[t.Optional[exp.Expression]]: 1584 """ 1585 Parses a list of tokens into a given Expression type. If a collection of Expression 1586 types is given instead, this method will try to parse the token list into each one 1587 of them, stopping at the first for which the parsing succeeds. 1588 1589 Args: 1590 expression_types: The expression type(s) to try and parse the token list into. 1591 raw_tokens: The list of tokens. 1592 sql: The original SQL string, used to produce helpful debug messages. 1593 1594 Returns: 1595 The target Expression. 1596 """ 1597 errors = [] 1598 for expression_type in ensure_list(expression_types): 1599 parser = self.EXPRESSION_PARSERS.get(expression_type) 1600 if not parser: 1601 raise TypeError(f"No parser registered for {expression_type}") 1602 1603 try: 1604 return self._parse(parser, raw_tokens, sql) 1605 except ParseError as e: 1606 e.errors[0]["into_expression"] = expression_type 1607 errors.append(e) 1608 1609 raise ParseError( 1610 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1611 errors=merge_errors(errors), 1612 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1652 def check_errors(self) -> None: 1653 """Logs or raises any found errors, depending on the chosen error level setting.""" 1654 if self.error_level == ErrorLevel.WARN: 1655 for error in self.errors: 1656 logger.error(str(error)) 1657 elif self.error_level == ErrorLevel.RAISE and self.errors: 1658 raise ParseError( 1659 concat_messages(self.errors, self.max_errors), 1660 errors=merge_errors(self.errors), 1661 )
Logs or raises any found errors, depending on the chosen error level setting.
1663 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1664 """ 1665 Appends an error in the list of recorded errors or raises it, depending on the chosen 1666 error level setting. 1667 """ 1668 token = token or self._curr or self._prev or Token.string("") 1669 start = token.start 1670 end = token.end + 1 1671 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1672 highlight = self.sql[start:end] 1673 end_context = self.sql[end : end + self.error_message_context] 1674 1675 error = ParseError.new( 1676 f"{message}. Line {token.line}, Col: {token.col}.\n" 1677 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1678 description=message, 1679 line=token.line, 1680 col=token.col, 1681 start_context=start_context, 1682 highlight=highlight, 1683 end_context=end_context, 1684 ) 1685 1686 if self.error_level == ErrorLevel.IMMEDIATE: 1687 raise error 1688 1689 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1691 def expression( 1692 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1693 ) -> E: 1694 """ 1695 Creates a new, validated Expression. 1696 1697 Args: 1698 exp_class: The expression class to instantiate. 1699 comments: An optional list of comments to attach to the expression. 1700 kwargs: The arguments to set for the expression along with their respective values. 1701 1702 Returns: 1703 The target expression. 1704 """ 1705 instance = exp_class(**kwargs) 1706 instance.add_comments(comments) if comments else self._add_comments(instance) 1707 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1714 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1715 """ 1716 Validates an Expression, making sure that all its mandatory arguments are set. 1717 1718 Args: 1719 expression: The expression to validate. 1720 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1721 1722 Returns: 1723 The validated expression. 1724 """ 1725 if self.error_level != ErrorLevel.IGNORE: 1726 for error_message in expression.error_messages(args): 1727 self.raise_error(error_message) 1728 1729 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4706 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4707 start = self._index 4708 _, side_token, kind_token = self._parse_join_parts() 4709 4710 side = side_token.text if side_token else None 4711 kind = kind_token.text if kind_token else None 4712 4713 if not self._match_set(self.SET_OPERATIONS): 4714 self._retreat(start) 4715 return None 4716 4717 token_type = self._prev.token_type 4718 4719 if token_type == TokenType.UNION: 4720 operation: t.Type[exp.SetOperation] = exp.Union 4721 elif token_type == TokenType.EXCEPT: 4722 operation = exp.Except 4723 else: 4724 operation = exp.Intersect 4725 4726 comments = self._prev.comments 4727 4728 if self._match(TokenType.DISTINCT): 4729 distinct: t.Optional[bool] = True 4730 elif self._match(TokenType.ALL): 4731 distinct = False 4732 else: 4733 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4734 if distinct is None: 4735 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4736 4737 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4738 "STRICT", "CORRESPONDING" 4739 ) 4740 if self._match_text_seq("CORRESPONDING"): 4741 by_name = True 4742 if not side and not kind: 4743 kind = "INNER" 4744 4745 on_column_list = None 4746 if by_name and self._match_texts(("ON", "BY")): 4747 on_column_list = self._parse_wrapped_csv(self._parse_column) 4748 4749 expression = self._parse_select(nested=True, parse_set_operation=False) 4750 4751 return self.expression( 4752 operation, 4753 comments=comments, 4754 this=this, 4755 distinct=distinct, 4756 by_name=by_name, 4757 expression=expression, 4758 side=side, 4759 kind=kind, 4760 on=on_column_list, 4761 )