sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 154 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 155 156 157def build_locate_strposition(args: t.List): 158 return exp.StrPosition( 159 this=seq_get(args, 1), 160 substr=seq_get(args, 0), 161 position=seq_get(args, 2), 162 ) 163 164 165class _Parser(type): 166 def __new__(cls, clsname, bases, attrs): 167 klass = super().__new__(cls, clsname, bases, attrs) 168 169 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 170 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 171 172 return klass 173 174 175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.BLOB, 332 TokenType.MEDIUMBLOB, 333 TokenType.LONGBLOB, 334 TokenType.BINARY, 335 TokenType.VARBINARY, 336 TokenType.JSON, 337 TokenType.JSONB, 338 TokenType.INTERVAL, 339 TokenType.TINYBLOB, 340 TokenType.TINYTEXT, 341 TokenType.TIME, 342 TokenType.TIMETZ, 343 TokenType.TIMESTAMP, 344 TokenType.TIMESTAMP_S, 345 TokenType.TIMESTAMP_MS, 346 TokenType.TIMESTAMP_NS, 347 TokenType.TIMESTAMPTZ, 348 TokenType.TIMESTAMPLTZ, 349 TokenType.TIMESTAMPNTZ, 350 TokenType.DATETIME, 351 TokenType.DATETIME2, 352 TokenType.DATETIME64, 353 TokenType.SMALLDATETIME, 354 TokenType.DATE, 355 TokenType.DATE32, 356 TokenType.INT4RANGE, 357 TokenType.INT4MULTIRANGE, 358 TokenType.INT8RANGE, 359 TokenType.INT8MULTIRANGE, 360 TokenType.NUMRANGE, 361 TokenType.NUMMULTIRANGE, 362 TokenType.TSRANGE, 363 TokenType.TSMULTIRANGE, 364 TokenType.TSTZRANGE, 365 TokenType.TSTZMULTIRANGE, 366 TokenType.DATERANGE, 367 TokenType.DATEMULTIRANGE, 368 TokenType.DECIMAL, 369 TokenType.DECIMAL32, 370 TokenType.DECIMAL64, 371 TokenType.DECIMAL128, 372 TokenType.DECIMAL256, 373 TokenType.UDECIMAL, 374 TokenType.BIGDECIMAL, 375 TokenType.UUID, 376 TokenType.GEOGRAPHY, 377 TokenType.GEOMETRY, 378 TokenType.POINT, 379 TokenType.RING, 380 TokenType.LINESTRING, 381 TokenType.MULTILINESTRING, 382 TokenType.POLYGON, 383 TokenType.MULTIPOLYGON, 384 TokenType.HLLSKETCH, 385 TokenType.HSTORE, 386 TokenType.PSEUDO_TYPE, 387 TokenType.SUPER, 388 TokenType.SERIAL, 389 TokenType.SMALLSERIAL, 390 TokenType.BIGSERIAL, 391 TokenType.XML, 392 TokenType.YEAR, 393 TokenType.USERDEFINED, 394 TokenType.MONEY, 395 TokenType.SMALLMONEY, 396 TokenType.ROWVERSION, 397 TokenType.IMAGE, 398 TokenType.VARIANT, 399 TokenType.VECTOR, 400 TokenType.VOID, 401 TokenType.OBJECT, 402 TokenType.OBJECT_IDENTIFIER, 403 TokenType.INET, 404 TokenType.IPADDRESS, 405 TokenType.IPPREFIX, 406 TokenType.IPV4, 407 TokenType.IPV6, 408 TokenType.UNKNOWN, 409 TokenType.NOTHING, 410 TokenType.NULL, 411 TokenType.NAME, 412 TokenType.TDIGEST, 413 TokenType.DYNAMIC, 414 *ENUM_TYPE_TOKENS, 415 *NESTED_TYPE_TOKENS, 416 *AGGREGATE_TYPE_TOKENS, 417 } 418 419 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 420 TokenType.BIGINT: TokenType.UBIGINT, 421 TokenType.INT: TokenType.UINT, 422 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 423 TokenType.SMALLINT: TokenType.USMALLINT, 424 TokenType.TINYINT: TokenType.UTINYINT, 425 TokenType.DECIMAL: TokenType.UDECIMAL, 426 TokenType.DOUBLE: TokenType.UDOUBLE, 427 } 428 429 SUBQUERY_PREDICATES = { 430 TokenType.ANY: exp.Any, 431 TokenType.ALL: exp.All, 432 TokenType.EXISTS: exp.Exists, 433 TokenType.SOME: exp.Any, 434 } 435 436 RESERVED_TOKENS = { 437 *Tokenizer.SINGLE_TOKENS.values(), 438 TokenType.SELECT, 439 } - {TokenType.IDENTIFIER} 440 441 DB_CREATABLES = { 442 TokenType.DATABASE, 443 TokenType.DICTIONARY, 444 TokenType.FILE_FORMAT, 445 TokenType.MODEL, 446 TokenType.NAMESPACE, 447 TokenType.SCHEMA, 448 TokenType.SEQUENCE, 449 TokenType.SINK, 450 TokenType.SOURCE, 451 TokenType.STAGE, 452 TokenType.STORAGE_INTEGRATION, 453 TokenType.STREAMLIT, 454 TokenType.TABLE, 455 TokenType.TAG, 456 TokenType.VIEW, 457 TokenType.WAREHOUSE, 458 } 459 460 CREATABLES = { 461 TokenType.COLUMN, 462 TokenType.CONSTRAINT, 463 TokenType.FOREIGN_KEY, 464 TokenType.FUNCTION, 465 TokenType.INDEX, 466 TokenType.PROCEDURE, 467 *DB_CREATABLES, 468 } 469 470 ALTERABLES = { 471 TokenType.INDEX, 472 TokenType.TABLE, 473 TokenType.VIEW, 474 } 475 476 # Tokens that can represent identifiers 477 ID_VAR_TOKENS = { 478 TokenType.ALL, 479 TokenType.ATTACH, 480 TokenType.VAR, 481 TokenType.ANTI, 482 TokenType.APPLY, 483 TokenType.ASC, 484 TokenType.ASOF, 485 TokenType.AUTO_INCREMENT, 486 TokenType.BEGIN, 487 TokenType.BPCHAR, 488 TokenType.CACHE, 489 TokenType.CASE, 490 TokenType.COLLATE, 491 TokenType.COMMAND, 492 TokenType.COMMENT, 493 TokenType.COMMIT, 494 TokenType.CONSTRAINT, 495 TokenType.COPY, 496 TokenType.CUBE, 497 TokenType.CURRENT_SCHEMA, 498 TokenType.DEFAULT, 499 TokenType.DELETE, 500 TokenType.DESC, 501 TokenType.DESCRIBE, 502 TokenType.DETACH, 503 TokenType.DICTIONARY, 504 TokenType.DIV, 505 TokenType.END, 506 TokenType.EXECUTE, 507 TokenType.EXPORT, 508 TokenType.ESCAPE, 509 TokenType.FALSE, 510 TokenType.FIRST, 511 TokenType.FILTER, 512 TokenType.FINAL, 513 TokenType.FORMAT, 514 TokenType.FULL, 515 TokenType.GET, 516 TokenType.IDENTIFIER, 517 TokenType.IS, 518 TokenType.ISNULL, 519 TokenType.INTERVAL, 520 TokenType.KEEP, 521 TokenType.KILL, 522 TokenType.LEFT, 523 TokenType.LIMIT, 524 TokenType.LOAD, 525 TokenType.MERGE, 526 TokenType.NATURAL, 527 TokenType.NEXT, 528 TokenType.OFFSET, 529 TokenType.OPERATOR, 530 TokenType.ORDINALITY, 531 TokenType.OVERLAPS, 532 TokenType.OVERWRITE, 533 TokenType.PARTITION, 534 TokenType.PERCENT, 535 TokenType.PIVOT, 536 TokenType.PRAGMA, 537 TokenType.PUT, 538 TokenType.RANGE, 539 TokenType.RECURSIVE, 540 TokenType.REFERENCES, 541 TokenType.REFRESH, 542 TokenType.RENAME, 543 TokenType.REPLACE, 544 TokenType.RIGHT, 545 TokenType.ROLLUP, 546 TokenType.ROW, 547 TokenType.ROWS, 548 TokenType.SEMI, 549 TokenType.SET, 550 TokenType.SETTINGS, 551 TokenType.SHOW, 552 TokenType.TEMPORARY, 553 TokenType.TOP, 554 TokenType.TRUE, 555 TokenType.TRUNCATE, 556 TokenType.UNIQUE, 557 TokenType.UNNEST, 558 TokenType.UNPIVOT, 559 TokenType.UPDATE, 560 TokenType.USE, 561 TokenType.VOLATILE, 562 TokenType.WINDOW, 563 *CREATABLES, 564 *SUBQUERY_PREDICATES, 565 *TYPE_TOKENS, 566 *NO_PAREN_FUNCTIONS, 567 } 568 ID_VAR_TOKENS.remove(TokenType.UNION) 569 570 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 571 TokenType.ANTI, 572 TokenType.APPLY, 573 TokenType.ASOF, 574 TokenType.FULL, 575 TokenType.LEFT, 576 TokenType.LOCK, 577 TokenType.NATURAL, 578 TokenType.RIGHT, 579 TokenType.SEMI, 580 TokenType.WINDOW, 581 } 582 583 ALIAS_TOKENS = ID_VAR_TOKENS 584 585 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 586 587 ARRAY_CONSTRUCTORS = { 588 "ARRAY": exp.Array, 589 "LIST": exp.List, 590 } 591 592 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 593 594 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 595 596 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 597 598 FUNC_TOKENS = { 599 TokenType.COLLATE, 600 TokenType.COMMAND, 601 TokenType.CURRENT_DATE, 602 TokenType.CURRENT_DATETIME, 603 TokenType.CURRENT_SCHEMA, 604 TokenType.CURRENT_TIMESTAMP, 605 TokenType.CURRENT_TIME, 606 TokenType.CURRENT_USER, 607 TokenType.FILTER, 608 TokenType.FIRST, 609 TokenType.FORMAT, 610 TokenType.GLOB, 611 TokenType.IDENTIFIER, 612 TokenType.INDEX, 613 TokenType.ISNULL, 614 TokenType.ILIKE, 615 TokenType.INSERT, 616 TokenType.LIKE, 617 TokenType.MERGE, 618 TokenType.NEXT, 619 TokenType.OFFSET, 620 TokenType.PRIMARY_KEY, 621 TokenType.RANGE, 622 TokenType.REPLACE, 623 TokenType.RLIKE, 624 TokenType.ROW, 625 TokenType.UNNEST, 626 TokenType.VAR, 627 TokenType.LEFT, 628 TokenType.RIGHT, 629 TokenType.SEQUENCE, 630 TokenType.DATE, 631 TokenType.DATETIME, 632 TokenType.TABLE, 633 TokenType.TIMESTAMP, 634 TokenType.TIMESTAMPTZ, 635 TokenType.TRUNCATE, 636 TokenType.WINDOW, 637 TokenType.XOR, 638 *TYPE_TOKENS, 639 *SUBQUERY_PREDICATES, 640 } 641 642 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 643 TokenType.AND: exp.And, 644 } 645 646 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.COLON_EQ: exp.PropertyEQ, 648 } 649 650 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.OR: exp.Or, 652 } 653 654 EQUALITY = { 655 TokenType.EQ: exp.EQ, 656 TokenType.NEQ: exp.NEQ, 657 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 658 } 659 660 COMPARISON = { 661 TokenType.GT: exp.GT, 662 TokenType.GTE: exp.GTE, 663 TokenType.LT: exp.LT, 664 TokenType.LTE: exp.LTE, 665 } 666 667 BITWISE = { 668 TokenType.AMP: exp.BitwiseAnd, 669 TokenType.CARET: exp.BitwiseXor, 670 TokenType.PIPE: exp.BitwiseOr, 671 } 672 673 TERM = { 674 TokenType.DASH: exp.Sub, 675 TokenType.PLUS: exp.Add, 676 TokenType.MOD: exp.Mod, 677 TokenType.COLLATE: exp.Collate, 678 } 679 680 FACTOR = { 681 TokenType.DIV: exp.IntDiv, 682 TokenType.LR_ARROW: exp.Distance, 683 TokenType.SLASH: exp.Div, 684 TokenType.STAR: exp.Mul, 685 } 686 687 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 688 689 TIMES = { 690 TokenType.TIME, 691 TokenType.TIMETZ, 692 } 693 694 TIMESTAMPS = { 695 TokenType.TIMESTAMP, 696 TokenType.TIMESTAMPNTZ, 697 TokenType.TIMESTAMPTZ, 698 TokenType.TIMESTAMPLTZ, 699 *TIMES, 700 } 701 702 SET_OPERATIONS = { 703 TokenType.UNION, 704 TokenType.INTERSECT, 705 TokenType.EXCEPT, 706 } 707 708 JOIN_METHODS = { 709 TokenType.ASOF, 710 TokenType.NATURAL, 711 TokenType.POSITIONAL, 712 } 713 714 JOIN_SIDES = { 715 TokenType.LEFT, 716 TokenType.RIGHT, 717 TokenType.FULL, 718 } 719 720 JOIN_KINDS = { 721 TokenType.ANTI, 722 TokenType.CROSS, 723 TokenType.INNER, 724 TokenType.OUTER, 725 TokenType.SEMI, 726 TokenType.STRAIGHT_JOIN, 727 } 728 729 JOIN_HINTS: t.Set[str] = set() 730 731 LAMBDAS = { 732 TokenType.ARROW: lambda self, expressions: self.expression( 733 exp.Lambda, 734 this=self._replace_lambda( 735 self._parse_assignment(), 736 expressions, 737 ), 738 expressions=expressions, 739 ), 740 TokenType.FARROW: lambda self, expressions: self.expression( 741 exp.Kwarg, 742 this=exp.var(expressions[0].name), 743 expression=self._parse_assignment(), 744 ), 745 } 746 747 COLUMN_OPERATORS = { 748 TokenType.DOT: None, 749 TokenType.DOTCOLON: lambda self, this, to: self.expression( 750 exp.JSONCast, 751 this=this, 752 to=to, 753 ), 754 TokenType.DCOLON: lambda self, this, to: self.expression( 755 exp.Cast if self.STRICT_CAST else exp.TryCast, 756 this=this, 757 to=to, 758 ), 759 TokenType.ARROW: lambda self, this, path: self.expression( 760 exp.JSONExtract, 761 this=this, 762 expression=self.dialect.to_json_path(path), 763 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 764 ), 765 TokenType.DARROW: lambda self, this, path: self.expression( 766 exp.JSONExtractScalar, 767 this=this, 768 expression=self.dialect.to_json_path(path), 769 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 770 ), 771 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 772 exp.JSONBExtract, 773 this=this, 774 expression=path, 775 ), 776 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 777 exp.JSONBExtractScalar, 778 this=this, 779 expression=path, 780 ), 781 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 782 exp.JSONBContains, 783 this=this, 784 expression=key, 785 ), 786 } 787 788 EXPRESSION_PARSERS = { 789 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 790 exp.Column: lambda self: self._parse_column(), 791 exp.Condition: lambda self: self._parse_assignment(), 792 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 793 exp.Expression: lambda self: self._parse_expression(), 794 exp.From: lambda self: self._parse_from(joins=True), 795 exp.Group: lambda self: self._parse_group(), 796 exp.Having: lambda self: self._parse_having(), 797 exp.Hint: lambda self: self._parse_hint_body(), 798 exp.Identifier: lambda self: self._parse_id_var(), 799 exp.Join: lambda self: self._parse_join(), 800 exp.Lambda: lambda self: self._parse_lambda(), 801 exp.Lateral: lambda self: self._parse_lateral(), 802 exp.Limit: lambda self: self._parse_limit(), 803 exp.Offset: lambda self: self._parse_offset(), 804 exp.Order: lambda self: self._parse_order(), 805 exp.Ordered: lambda self: self._parse_ordered(), 806 exp.Properties: lambda self: self._parse_properties(), 807 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 808 exp.Qualify: lambda self: self._parse_qualify(), 809 exp.Returning: lambda self: self._parse_returning(), 810 exp.Select: lambda self: self._parse_select(), 811 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 812 exp.Table: lambda self: self._parse_table_parts(), 813 exp.TableAlias: lambda self: self._parse_table_alias(), 814 exp.Tuple: lambda self: self._parse_value(values=False), 815 exp.Whens: lambda self: self._parse_when_matched(), 816 exp.Where: lambda self: self._parse_where(), 817 exp.Window: lambda self: self._parse_named_window(), 818 exp.With: lambda self: self._parse_with(), 819 "JOIN_TYPE": lambda self: self._parse_join_parts(), 820 } 821 822 STATEMENT_PARSERS = { 823 TokenType.ALTER: lambda self: self._parse_alter(), 824 TokenType.ANALYZE: lambda self: self._parse_analyze(), 825 TokenType.BEGIN: lambda self: self._parse_transaction(), 826 TokenType.CACHE: lambda self: self._parse_cache(), 827 TokenType.COMMENT: lambda self: self._parse_comment(), 828 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 829 TokenType.COPY: lambda self: self._parse_copy(), 830 TokenType.CREATE: lambda self: self._parse_create(), 831 TokenType.DELETE: lambda self: self._parse_delete(), 832 TokenType.DESC: lambda self: self._parse_describe(), 833 TokenType.DESCRIBE: lambda self: self._parse_describe(), 834 TokenType.DROP: lambda self: self._parse_drop(), 835 TokenType.GRANT: lambda self: self._parse_grant(), 836 TokenType.INSERT: lambda self: self._parse_insert(), 837 TokenType.KILL: lambda self: self._parse_kill(), 838 TokenType.LOAD: lambda self: self._parse_load(), 839 TokenType.MERGE: lambda self: self._parse_merge(), 840 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 841 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 842 TokenType.REFRESH: lambda self: self._parse_refresh(), 843 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 844 TokenType.SET: lambda self: self._parse_set(), 845 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 846 TokenType.UNCACHE: lambda self: self._parse_uncache(), 847 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 848 TokenType.UPDATE: lambda self: self._parse_update(), 849 TokenType.USE: lambda self: self._parse_use(), 850 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 851 } 852 853 UNARY_PARSERS = { 854 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 855 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 856 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 857 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 858 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 859 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 860 } 861 862 STRING_PARSERS = { 863 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 864 exp.RawString, this=token.text 865 ), 866 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 867 exp.National, this=token.text 868 ), 869 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 870 TokenType.STRING: lambda self, token: self.expression( 871 exp.Literal, this=token.text, is_string=True 872 ), 873 TokenType.UNICODE_STRING: lambda self, token: self.expression( 874 exp.UnicodeString, 875 this=token.text, 876 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 877 ), 878 } 879 880 NUMERIC_PARSERS = { 881 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 882 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 883 TokenType.HEX_STRING: lambda self, token: self.expression( 884 exp.HexString, 885 this=token.text, 886 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 887 ), 888 TokenType.NUMBER: lambda self, token: self.expression( 889 exp.Literal, this=token.text, is_string=False 890 ), 891 } 892 893 PRIMARY_PARSERS = { 894 **STRING_PARSERS, 895 **NUMERIC_PARSERS, 896 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 897 TokenType.NULL: lambda self, _: self.expression(exp.Null), 898 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 899 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 900 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 901 TokenType.STAR: lambda self, _: self._parse_star_ops(), 902 } 903 904 PLACEHOLDER_PARSERS = { 905 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 906 TokenType.PARAMETER: lambda self: self._parse_parameter(), 907 TokenType.COLON: lambda self: ( 908 self.expression(exp.Placeholder, this=self._prev.text) 909 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 910 else None 911 ), 912 } 913 914 RANGE_PARSERS = { 915 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 916 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 917 TokenType.GLOB: binary_range_parser(exp.Glob), 918 TokenType.ILIKE: binary_range_parser(exp.ILike), 919 TokenType.IN: lambda self, this: self._parse_in(this), 920 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 921 TokenType.IS: lambda self, this: self._parse_is(this), 922 TokenType.LIKE: binary_range_parser(exp.Like), 923 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 924 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 925 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 926 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 927 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 928 } 929 930 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 931 "ALLOWED_VALUES": lambda self: self.expression( 932 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 933 ), 934 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 935 "AUTO": lambda self: self._parse_auto_property(), 936 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 937 "BACKUP": lambda self: self.expression( 938 exp.BackupProperty, this=self._parse_var(any_token=True) 939 ), 940 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 941 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 942 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 943 "CHECKSUM": lambda self: self._parse_checksum(), 944 "CLUSTER BY": lambda self: self._parse_cluster(), 945 "CLUSTERED": lambda self: self._parse_clustered_by(), 946 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 947 exp.CollateProperty, **kwargs 948 ), 949 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 950 "CONTAINS": lambda self: self._parse_contains_property(), 951 "COPY": lambda self: self._parse_copy_property(), 952 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 953 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 954 "DEFINER": lambda self: self._parse_definer(), 955 "DETERMINISTIC": lambda self: self.expression( 956 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 957 ), 958 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 959 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 960 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 961 "DISTKEY": lambda self: self._parse_distkey(), 962 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 963 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 964 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 965 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 966 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 967 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 968 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 969 "FREESPACE": lambda self: self._parse_freespace(), 970 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 971 "HEAP": lambda self: self.expression(exp.HeapProperty), 972 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 973 "IMMUTABLE": lambda self: self.expression( 974 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 975 ), 976 "INHERITS": lambda self: self.expression( 977 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 978 ), 979 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 980 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 981 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 982 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 983 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 984 "LIKE": lambda self: self._parse_create_like(), 985 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 986 "LOCK": lambda self: self._parse_locking(), 987 "LOCKING": lambda self: self._parse_locking(), 988 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 989 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 990 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 991 "MODIFIES": lambda self: self._parse_modifies_property(), 992 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 993 "NO": lambda self: self._parse_no_property(), 994 "ON": lambda self: self._parse_on_property(), 995 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 996 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 997 "PARTITION": lambda self: self._parse_partitioned_of(), 998 "PARTITION BY": lambda self: self._parse_partitioned_by(), 999 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1000 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1001 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1002 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1003 "READS": lambda self: self._parse_reads_property(), 1004 "REMOTE": lambda self: self._parse_remote_with_connection(), 1005 "RETURNS": lambda self: self._parse_returns(), 1006 "STRICT": lambda self: self.expression(exp.StrictProperty), 1007 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1008 "ROW": lambda self: self._parse_row(), 1009 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1010 "SAMPLE": lambda self: self.expression( 1011 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1012 ), 1013 "SECURE": lambda self: self.expression(exp.SecureProperty), 1014 "SECURITY": lambda self: self._parse_security(), 1015 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1016 "SETTINGS": lambda self: self._parse_settings_property(), 1017 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1018 "SORTKEY": lambda self: self._parse_sortkey(), 1019 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1020 "STABLE": lambda self: self.expression( 1021 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1022 ), 1023 "STORED": lambda self: self._parse_stored(), 1024 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1025 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1026 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1027 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1028 "TO": lambda self: self._parse_to_table(), 1029 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1030 "TRANSFORM": lambda self: self.expression( 1031 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1032 ), 1033 "TTL": lambda self: self._parse_ttl(), 1034 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1035 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1036 "VOLATILE": lambda self: self._parse_volatile_property(), 1037 "WITH": lambda self: self._parse_with_property(), 1038 } 1039 1040 CONSTRAINT_PARSERS = { 1041 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1042 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1043 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1044 "CHARACTER SET": lambda self: self.expression( 1045 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1046 ), 1047 "CHECK": lambda self: self.expression( 1048 exp.CheckColumnConstraint, 1049 this=self._parse_wrapped(self._parse_assignment), 1050 enforced=self._match_text_seq("ENFORCED"), 1051 ), 1052 "COLLATE": lambda self: self.expression( 1053 exp.CollateColumnConstraint, 1054 this=self._parse_identifier() or self._parse_column(), 1055 ), 1056 "COMMENT": lambda self: self.expression( 1057 exp.CommentColumnConstraint, this=self._parse_string() 1058 ), 1059 "COMPRESS": lambda self: self._parse_compress(), 1060 "CLUSTERED": lambda self: self.expression( 1061 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1062 ), 1063 "NONCLUSTERED": lambda self: self.expression( 1064 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1065 ), 1066 "DEFAULT": lambda self: self.expression( 1067 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1068 ), 1069 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1070 "EPHEMERAL": lambda self: self.expression( 1071 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1072 ), 1073 "EXCLUDE": lambda self: self.expression( 1074 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1075 ), 1076 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1077 "FORMAT": lambda self: self.expression( 1078 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1079 ), 1080 "GENERATED": lambda self: self._parse_generated_as_identity(), 1081 "IDENTITY": lambda self: self._parse_auto_increment(), 1082 "INLINE": lambda self: self._parse_inline(), 1083 "LIKE": lambda self: self._parse_create_like(), 1084 "NOT": lambda self: self._parse_not_constraint(), 1085 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1086 "ON": lambda self: ( 1087 self._match(TokenType.UPDATE) 1088 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1089 ) 1090 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1091 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1092 "PERIOD": lambda self: self._parse_period_for_system_time(), 1093 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1094 "REFERENCES": lambda self: self._parse_references(match=False), 1095 "TITLE": lambda self: self.expression( 1096 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1097 ), 1098 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1099 "UNIQUE": lambda self: self._parse_unique(), 1100 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1101 "WATERMARK": lambda self: self.expression( 1102 exp.WatermarkColumnConstraint, 1103 this=self._match(TokenType.FOR) and self._parse_column(), 1104 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1105 ), 1106 "WITH": lambda self: self.expression( 1107 exp.Properties, expressions=self._parse_wrapped_properties() 1108 ), 1109 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1110 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1111 } 1112 1113 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1114 klass = ( 1115 exp.PartitionedByBucket 1116 if self._prev.text.upper() == "BUCKET" 1117 else exp.PartitionByTruncate 1118 ) 1119 1120 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1121 this, expression = seq_get(args, 0), seq_get(args, 1) 1122 1123 if isinstance(this, exp.Literal): 1124 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1125 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1126 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1127 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1128 # 1129 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1130 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1131 this, expression = expression, this 1132 1133 return self.expression(klass, this=this, expression=expression) 1134 1135 ALTER_PARSERS = { 1136 "ADD": lambda self: self._parse_alter_table_add(), 1137 "AS": lambda self: self._parse_select(), 1138 "ALTER": lambda self: self._parse_alter_table_alter(), 1139 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1140 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1141 "DROP": lambda self: self._parse_alter_table_drop(), 1142 "RENAME": lambda self: self._parse_alter_table_rename(), 1143 "SET": lambda self: self._parse_alter_table_set(), 1144 "SWAP": lambda self: self.expression( 1145 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1146 ), 1147 } 1148 1149 ALTER_ALTER_PARSERS = { 1150 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1151 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1152 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1153 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1154 } 1155 1156 SCHEMA_UNNAMED_CONSTRAINTS = { 1157 "CHECK", 1158 "EXCLUDE", 1159 "FOREIGN KEY", 1160 "LIKE", 1161 "PERIOD", 1162 "PRIMARY KEY", 1163 "UNIQUE", 1164 "WATERMARK", 1165 "BUCKET", 1166 "TRUNCATE", 1167 } 1168 1169 NO_PAREN_FUNCTION_PARSERS = { 1170 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1171 "CASE": lambda self: self._parse_case(), 1172 "CONNECT_BY_ROOT": lambda self: self.expression( 1173 exp.ConnectByRoot, this=self._parse_column() 1174 ), 1175 "IF": lambda self: self._parse_if(), 1176 } 1177 1178 INVALID_FUNC_NAME_TOKENS = { 1179 TokenType.IDENTIFIER, 1180 TokenType.STRING, 1181 } 1182 1183 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1184 1185 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1186 1187 FUNCTION_PARSERS = { 1188 **{ 1189 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1190 }, 1191 **{ 1192 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1193 }, 1194 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1195 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1196 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1197 "DECODE": lambda self: self._parse_decode(), 1198 "EXTRACT": lambda self: self._parse_extract(), 1199 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1200 "GAP_FILL": lambda self: self._parse_gap_fill(), 1201 "JSON_OBJECT": lambda self: self._parse_json_object(), 1202 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1203 "JSON_TABLE": lambda self: self._parse_json_table(), 1204 "MATCH": lambda self: self._parse_match_against(), 1205 "NORMALIZE": lambda self: self._parse_normalize(), 1206 "OPENJSON": lambda self: self._parse_open_json(), 1207 "OVERLAY": lambda self: self._parse_overlay(), 1208 "POSITION": lambda self: self._parse_position(), 1209 "PREDICT": lambda self: self._parse_predict(), 1210 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1211 "STRING_AGG": lambda self: self._parse_string_agg(), 1212 "SUBSTRING": lambda self: self._parse_substring(), 1213 "TRIM": lambda self: self._parse_trim(), 1214 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1215 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1216 "XMLELEMENT": lambda self: self.expression( 1217 exp.XMLElement, 1218 this=self._match_text_seq("NAME") and self._parse_id_var(), 1219 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1220 ), 1221 "XMLTABLE": lambda self: self._parse_xml_table(), 1222 } 1223 1224 QUERY_MODIFIER_PARSERS = { 1225 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1226 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1227 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1228 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1229 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1230 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1231 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1232 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1233 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1234 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1235 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1236 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1237 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1238 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1239 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1240 TokenType.CLUSTER_BY: lambda self: ( 1241 "cluster", 1242 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1243 ), 1244 TokenType.DISTRIBUTE_BY: lambda self: ( 1245 "distribute", 1246 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1247 ), 1248 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1249 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1250 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1251 } 1252 1253 SET_PARSERS = { 1254 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1255 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1256 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1257 "TRANSACTION": lambda self: self._parse_set_transaction(), 1258 } 1259 1260 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1261 1262 TYPE_LITERAL_PARSERS = { 1263 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1264 } 1265 1266 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1267 1268 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1269 1270 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1271 1272 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1273 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1274 "ISOLATION": ( 1275 ("LEVEL", "REPEATABLE", "READ"), 1276 ("LEVEL", "READ", "COMMITTED"), 1277 ("LEVEL", "READ", "UNCOMITTED"), 1278 ("LEVEL", "SERIALIZABLE"), 1279 ), 1280 "READ": ("WRITE", "ONLY"), 1281 } 1282 1283 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1284 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1285 ) 1286 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1287 1288 CREATE_SEQUENCE: OPTIONS_TYPE = { 1289 "SCALE": ("EXTEND", "NOEXTEND"), 1290 "SHARD": ("EXTEND", "NOEXTEND"), 1291 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1292 **dict.fromkeys( 1293 ( 1294 "SESSION", 1295 "GLOBAL", 1296 "KEEP", 1297 "NOKEEP", 1298 "ORDER", 1299 "NOORDER", 1300 "NOCACHE", 1301 "CYCLE", 1302 "NOCYCLE", 1303 "NOMINVALUE", 1304 "NOMAXVALUE", 1305 "NOSCALE", 1306 "NOSHARD", 1307 ), 1308 tuple(), 1309 ), 1310 } 1311 1312 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1313 1314 USABLES: OPTIONS_TYPE = dict.fromkeys( 1315 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1316 ) 1317 1318 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1319 1320 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1321 "TYPE": ("EVOLUTION",), 1322 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1323 } 1324 1325 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1326 1327 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1328 1329 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1330 "NOT": ("ENFORCED",), 1331 "MATCH": ( 1332 "FULL", 1333 "PARTIAL", 1334 "SIMPLE", 1335 ), 1336 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1337 "USING": ( 1338 "BTREE", 1339 "HASH", 1340 ), 1341 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1342 } 1343 1344 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1345 1346 CLONE_KEYWORDS = {"CLONE", "COPY"} 1347 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1348 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1349 1350 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1351 1352 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1353 1354 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1355 1356 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1357 1358 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1359 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1360 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1361 1362 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1363 1364 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1365 1366 ADD_CONSTRAINT_TOKENS = { 1367 TokenType.CONSTRAINT, 1368 TokenType.FOREIGN_KEY, 1369 TokenType.INDEX, 1370 TokenType.KEY, 1371 TokenType.PRIMARY_KEY, 1372 TokenType.UNIQUE, 1373 } 1374 1375 DISTINCT_TOKENS = {TokenType.DISTINCT} 1376 1377 NULL_TOKENS = {TokenType.NULL} 1378 1379 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1380 1381 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1382 1383 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1384 1385 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1386 1387 ODBC_DATETIME_LITERALS = { 1388 "d": exp.Date, 1389 "t": exp.Time, 1390 "ts": exp.Timestamp, 1391 } 1392 1393 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1394 1395 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1396 1397 # The style options for the DESCRIBE statement 1398 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1399 1400 # The style options for the ANALYZE statement 1401 ANALYZE_STYLES = { 1402 "BUFFER_USAGE_LIMIT", 1403 "FULL", 1404 "LOCAL", 1405 "NO_WRITE_TO_BINLOG", 1406 "SAMPLE", 1407 "SKIP_LOCKED", 1408 "VERBOSE", 1409 } 1410 1411 ANALYZE_EXPRESSION_PARSERS = { 1412 "ALL": lambda self: self._parse_analyze_columns(), 1413 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1414 "DELETE": lambda self: self._parse_analyze_delete(), 1415 "DROP": lambda self: self._parse_analyze_histogram(), 1416 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1417 "LIST": lambda self: self._parse_analyze_list(), 1418 "PREDICATE": lambda self: self._parse_analyze_columns(), 1419 "UPDATE": lambda self: self._parse_analyze_histogram(), 1420 "VALIDATE": lambda self: self._parse_analyze_validate(), 1421 } 1422 1423 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1424 1425 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1426 1427 OPERATION_MODIFIERS: t.Set[str] = set() 1428 1429 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1430 1431 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1432 1433 STRICT_CAST = True 1434 1435 PREFIXED_PIVOT_COLUMNS = False 1436 IDENTIFY_PIVOT_STRINGS = False 1437 1438 LOG_DEFAULTS_TO_LN = False 1439 1440 # Whether ADD is present for each column added by ALTER TABLE 1441 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1442 1443 # Whether the table sample clause expects CSV syntax 1444 TABLESAMPLE_CSV = False 1445 1446 # The default method used for table sampling 1447 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1448 1449 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1450 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1451 1452 # Whether the TRIM function expects the characters to trim as its first argument 1453 TRIM_PATTERN_FIRST = False 1454 1455 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1456 STRING_ALIASES = False 1457 1458 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1459 MODIFIERS_ATTACHED_TO_SET_OP = True 1460 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1461 1462 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1463 NO_PAREN_IF_COMMANDS = True 1464 1465 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1466 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1467 1468 # Whether the `:` operator is used to extract a value from a VARIANT column 1469 COLON_IS_VARIANT_EXTRACT = False 1470 1471 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1472 # If this is True and '(' is not found, the keyword will be treated as an identifier 1473 VALUES_FOLLOWED_BY_PAREN = True 1474 1475 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1476 SUPPORTS_IMPLICIT_UNNEST = False 1477 1478 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1479 INTERVAL_SPANS = True 1480 1481 # Whether a PARTITION clause can follow a table reference 1482 SUPPORTS_PARTITION_SELECTION = False 1483 1484 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1485 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1486 1487 # Whether the 'AS' keyword is optional in the CTE definition syntax 1488 OPTIONAL_ALIAS_TOKEN_CTE = True 1489 1490 __slots__ = ( 1491 "error_level", 1492 "error_message_context", 1493 "max_errors", 1494 "dialect", 1495 "sql", 1496 "errors", 1497 "_tokens", 1498 "_index", 1499 "_curr", 1500 "_next", 1501 "_prev", 1502 "_prev_comments", 1503 ) 1504 1505 # Autofilled 1506 SHOW_TRIE: t.Dict = {} 1507 SET_TRIE: t.Dict = {} 1508 1509 def __init__( 1510 self, 1511 error_level: t.Optional[ErrorLevel] = None, 1512 error_message_context: int = 100, 1513 max_errors: int = 3, 1514 dialect: DialectType = None, 1515 ): 1516 from sqlglot.dialects import Dialect 1517 1518 self.error_level = error_level or ErrorLevel.IMMEDIATE 1519 self.error_message_context = error_message_context 1520 self.max_errors = max_errors 1521 self.dialect = Dialect.get_or_raise(dialect) 1522 self.reset() 1523 1524 def reset(self): 1525 self.sql = "" 1526 self.errors = [] 1527 self._tokens = [] 1528 self._index = 0 1529 self._curr = None 1530 self._next = None 1531 self._prev = None 1532 self._prev_comments = None 1533 1534 def parse( 1535 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1536 ) -> t.List[t.Optional[exp.Expression]]: 1537 """ 1538 Parses a list of tokens and returns a list of syntax trees, one tree 1539 per parsed SQL statement. 1540 1541 Args: 1542 raw_tokens: The list of tokens. 1543 sql: The original SQL string, used to produce helpful debug messages. 1544 1545 Returns: 1546 The list of the produced syntax trees. 1547 """ 1548 return self._parse( 1549 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1550 ) 1551 1552 def parse_into( 1553 self, 1554 expression_types: exp.IntoType, 1555 raw_tokens: t.List[Token], 1556 sql: t.Optional[str] = None, 1557 ) -> t.List[t.Optional[exp.Expression]]: 1558 """ 1559 Parses a list of tokens into a given Expression type. If a collection of Expression 1560 types is given instead, this method will try to parse the token list into each one 1561 of them, stopping at the first for which the parsing succeeds. 1562 1563 Args: 1564 expression_types: The expression type(s) to try and parse the token list into. 1565 raw_tokens: The list of tokens. 1566 sql: The original SQL string, used to produce helpful debug messages. 1567 1568 Returns: 1569 The target Expression. 1570 """ 1571 errors = [] 1572 for expression_type in ensure_list(expression_types): 1573 parser = self.EXPRESSION_PARSERS.get(expression_type) 1574 if not parser: 1575 raise TypeError(f"No parser registered for {expression_type}") 1576 1577 try: 1578 return self._parse(parser, raw_tokens, sql) 1579 except ParseError as e: 1580 e.errors[0]["into_expression"] = expression_type 1581 errors.append(e) 1582 1583 raise ParseError( 1584 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1585 errors=merge_errors(errors), 1586 ) from errors[-1] 1587 1588 def _parse( 1589 self, 1590 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1591 raw_tokens: t.List[Token], 1592 sql: t.Optional[str] = None, 1593 ) -> t.List[t.Optional[exp.Expression]]: 1594 self.reset() 1595 self.sql = sql or "" 1596 1597 total = len(raw_tokens) 1598 chunks: t.List[t.List[Token]] = [[]] 1599 1600 for i, token in enumerate(raw_tokens): 1601 if token.token_type == TokenType.SEMICOLON: 1602 if token.comments: 1603 chunks.append([token]) 1604 1605 if i < total - 1: 1606 chunks.append([]) 1607 else: 1608 chunks[-1].append(token) 1609 1610 expressions = [] 1611 1612 for tokens in chunks: 1613 self._index = -1 1614 self._tokens = tokens 1615 self._advance() 1616 1617 expressions.append(parse_method(self)) 1618 1619 if self._index < len(self._tokens): 1620 self.raise_error("Invalid expression / Unexpected token") 1621 1622 self.check_errors() 1623 1624 return expressions 1625 1626 def check_errors(self) -> None: 1627 """Logs or raises any found errors, depending on the chosen error level setting.""" 1628 if self.error_level == ErrorLevel.WARN: 1629 for error in self.errors: 1630 logger.error(str(error)) 1631 elif self.error_level == ErrorLevel.RAISE and self.errors: 1632 raise ParseError( 1633 concat_messages(self.errors, self.max_errors), 1634 errors=merge_errors(self.errors), 1635 ) 1636 1637 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1638 """ 1639 Appends an error in the list of recorded errors or raises it, depending on the chosen 1640 error level setting. 1641 """ 1642 token = token or self._curr or self._prev or Token.string("") 1643 start = token.start 1644 end = token.end + 1 1645 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1646 highlight = self.sql[start:end] 1647 end_context = self.sql[end : end + self.error_message_context] 1648 1649 error = ParseError.new( 1650 f"{message}. Line {token.line}, Col: {token.col}.\n" 1651 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1652 description=message, 1653 line=token.line, 1654 col=token.col, 1655 start_context=start_context, 1656 highlight=highlight, 1657 end_context=end_context, 1658 ) 1659 1660 if self.error_level == ErrorLevel.IMMEDIATE: 1661 raise error 1662 1663 self.errors.append(error) 1664 1665 def expression( 1666 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1667 ) -> E: 1668 """ 1669 Creates a new, validated Expression. 1670 1671 Args: 1672 exp_class: The expression class to instantiate. 1673 comments: An optional list of comments to attach to the expression. 1674 kwargs: The arguments to set for the expression along with their respective values. 1675 1676 Returns: 1677 The target expression. 1678 """ 1679 instance = exp_class(**kwargs) 1680 instance.add_comments(comments) if comments else self._add_comments(instance) 1681 return self.validate_expression(instance) 1682 1683 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1684 if expression and self._prev_comments: 1685 expression.add_comments(self._prev_comments) 1686 self._prev_comments = None 1687 1688 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1689 """ 1690 Validates an Expression, making sure that all its mandatory arguments are set. 1691 1692 Args: 1693 expression: The expression to validate. 1694 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1695 1696 Returns: 1697 The validated expression. 1698 """ 1699 if self.error_level != ErrorLevel.IGNORE: 1700 for error_message in expression.error_messages(args): 1701 self.raise_error(error_message) 1702 1703 return expression 1704 1705 def _find_sql(self, start: Token, end: Token) -> str: 1706 return self.sql[start.start : end.end + 1] 1707 1708 def _is_connected(self) -> bool: 1709 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1710 1711 def _advance(self, times: int = 1) -> None: 1712 self._index += times 1713 self._curr = seq_get(self._tokens, self._index) 1714 self._next = seq_get(self._tokens, self._index + 1) 1715 1716 if self._index > 0: 1717 self._prev = self._tokens[self._index - 1] 1718 self._prev_comments = self._prev.comments 1719 else: 1720 self._prev = None 1721 self._prev_comments = None 1722 1723 def _retreat(self, index: int) -> None: 1724 if index != self._index: 1725 self._advance(index - self._index) 1726 1727 def _warn_unsupported(self) -> None: 1728 if len(self._tokens) <= 1: 1729 return 1730 1731 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1732 # interested in emitting a warning for the one being currently processed. 1733 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1734 1735 logger.warning( 1736 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1737 ) 1738 1739 def _parse_command(self) -> exp.Command: 1740 self._warn_unsupported() 1741 return self.expression( 1742 exp.Command, 1743 comments=self._prev_comments, 1744 this=self._prev.text.upper(), 1745 expression=self._parse_string(), 1746 ) 1747 1748 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1749 """ 1750 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1751 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1752 solve this by setting & resetting the parser state accordingly 1753 """ 1754 index = self._index 1755 error_level = self.error_level 1756 1757 self.error_level = ErrorLevel.IMMEDIATE 1758 try: 1759 this = parse_method() 1760 except ParseError: 1761 this = None 1762 finally: 1763 if not this or retreat: 1764 self._retreat(index) 1765 self.error_level = error_level 1766 1767 return this 1768 1769 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1770 start = self._prev 1771 exists = self._parse_exists() if allow_exists else None 1772 1773 self._match(TokenType.ON) 1774 1775 materialized = self._match_text_seq("MATERIALIZED") 1776 kind = self._match_set(self.CREATABLES) and self._prev 1777 if not kind: 1778 return self._parse_as_command(start) 1779 1780 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1781 this = self._parse_user_defined_function(kind=kind.token_type) 1782 elif kind.token_type == TokenType.TABLE: 1783 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1784 elif kind.token_type == TokenType.COLUMN: 1785 this = self._parse_column() 1786 else: 1787 this = self._parse_id_var() 1788 1789 self._match(TokenType.IS) 1790 1791 return self.expression( 1792 exp.Comment, 1793 this=this, 1794 kind=kind.text, 1795 expression=self._parse_string(), 1796 exists=exists, 1797 materialized=materialized, 1798 ) 1799 1800 def _parse_to_table( 1801 self, 1802 ) -> exp.ToTableProperty: 1803 table = self._parse_table_parts(schema=True) 1804 return self.expression(exp.ToTableProperty, this=table) 1805 1806 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1807 def _parse_ttl(self) -> exp.Expression: 1808 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1809 this = self._parse_bitwise() 1810 1811 if self._match_text_seq("DELETE"): 1812 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1813 if self._match_text_seq("RECOMPRESS"): 1814 return self.expression( 1815 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1816 ) 1817 if self._match_text_seq("TO", "DISK"): 1818 return self.expression( 1819 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1820 ) 1821 if self._match_text_seq("TO", "VOLUME"): 1822 return self.expression( 1823 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1824 ) 1825 1826 return this 1827 1828 expressions = self._parse_csv(_parse_ttl_action) 1829 where = self._parse_where() 1830 group = self._parse_group() 1831 1832 aggregates = None 1833 if group and self._match(TokenType.SET): 1834 aggregates = self._parse_csv(self._parse_set_item) 1835 1836 return self.expression( 1837 exp.MergeTreeTTL, 1838 expressions=expressions, 1839 where=where, 1840 group=group, 1841 aggregates=aggregates, 1842 ) 1843 1844 def _parse_statement(self) -> t.Optional[exp.Expression]: 1845 if self._curr is None: 1846 return None 1847 1848 if self._match_set(self.STATEMENT_PARSERS): 1849 comments = self._prev_comments 1850 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1851 stmt.add_comments(comments, prepend=True) 1852 return stmt 1853 1854 if self._match_set(self.dialect.tokenizer.COMMANDS): 1855 return self._parse_command() 1856 1857 expression = self._parse_expression() 1858 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1859 return self._parse_query_modifiers(expression) 1860 1861 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1862 start = self._prev 1863 temporary = self._match(TokenType.TEMPORARY) 1864 materialized = self._match_text_seq("MATERIALIZED") 1865 1866 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1867 if not kind: 1868 return self._parse_as_command(start) 1869 1870 concurrently = self._match_text_seq("CONCURRENTLY") 1871 if_exists = exists or self._parse_exists() 1872 1873 if kind == "COLUMN": 1874 this = self._parse_column() 1875 else: 1876 this = self._parse_table_parts( 1877 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1878 ) 1879 1880 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1881 1882 if self._match(TokenType.L_PAREN, advance=False): 1883 expressions = self._parse_wrapped_csv(self._parse_types) 1884 else: 1885 expressions = None 1886 1887 return self.expression( 1888 exp.Drop, 1889 exists=if_exists, 1890 this=this, 1891 expressions=expressions, 1892 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1893 temporary=temporary, 1894 materialized=materialized, 1895 cascade=self._match_text_seq("CASCADE"), 1896 constraints=self._match_text_seq("CONSTRAINTS"), 1897 purge=self._match_text_seq("PURGE"), 1898 cluster=cluster, 1899 concurrently=concurrently, 1900 ) 1901 1902 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1903 return ( 1904 self._match_text_seq("IF") 1905 and (not not_ or self._match(TokenType.NOT)) 1906 and self._match(TokenType.EXISTS) 1907 ) 1908 1909 def _parse_create(self) -> exp.Create | exp.Command: 1910 # Note: this can't be None because we've matched a statement parser 1911 start = self._prev 1912 1913 replace = ( 1914 start.token_type == TokenType.REPLACE 1915 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1916 or self._match_pair(TokenType.OR, TokenType.ALTER) 1917 ) 1918 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1919 1920 unique = self._match(TokenType.UNIQUE) 1921 1922 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1923 clustered = True 1924 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1925 "COLUMNSTORE" 1926 ): 1927 clustered = False 1928 else: 1929 clustered = None 1930 1931 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1932 self._advance() 1933 1934 properties = None 1935 create_token = self._match_set(self.CREATABLES) and self._prev 1936 1937 if not create_token: 1938 # exp.Properties.Location.POST_CREATE 1939 properties = self._parse_properties() 1940 create_token = self._match_set(self.CREATABLES) and self._prev 1941 1942 if not properties or not create_token: 1943 return self._parse_as_command(start) 1944 1945 concurrently = self._match_text_seq("CONCURRENTLY") 1946 exists = self._parse_exists(not_=True) 1947 this = None 1948 expression: t.Optional[exp.Expression] = None 1949 indexes = None 1950 no_schema_binding = None 1951 begin = None 1952 end = None 1953 clone = None 1954 1955 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1956 nonlocal properties 1957 if properties and temp_props: 1958 properties.expressions.extend(temp_props.expressions) 1959 elif temp_props: 1960 properties = temp_props 1961 1962 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1963 this = self._parse_user_defined_function(kind=create_token.token_type) 1964 1965 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1966 extend_props(self._parse_properties()) 1967 1968 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1969 extend_props(self._parse_properties()) 1970 1971 if not expression: 1972 if self._match(TokenType.COMMAND): 1973 expression = self._parse_as_command(self._prev) 1974 else: 1975 begin = self._match(TokenType.BEGIN) 1976 return_ = self._match_text_seq("RETURN") 1977 1978 if self._match(TokenType.STRING, advance=False): 1979 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1980 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1981 expression = self._parse_string() 1982 extend_props(self._parse_properties()) 1983 else: 1984 expression = self._parse_user_defined_function_expression() 1985 1986 end = self._match_text_seq("END") 1987 1988 if return_: 1989 expression = self.expression(exp.Return, this=expression) 1990 elif create_token.token_type == TokenType.INDEX: 1991 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1992 if not self._match(TokenType.ON): 1993 index = self._parse_id_var() 1994 anonymous = False 1995 else: 1996 index = None 1997 anonymous = True 1998 1999 this = self._parse_index(index=index, anonymous=anonymous) 2000 elif create_token.token_type in self.DB_CREATABLES: 2001 table_parts = self._parse_table_parts( 2002 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2003 ) 2004 2005 # exp.Properties.Location.POST_NAME 2006 self._match(TokenType.COMMA) 2007 extend_props(self._parse_properties(before=True)) 2008 2009 this = self._parse_schema(this=table_parts) 2010 2011 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2012 extend_props(self._parse_properties()) 2013 2014 has_alias = self._match(TokenType.ALIAS) 2015 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2016 # exp.Properties.Location.POST_ALIAS 2017 extend_props(self._parse_properties()) 2018 2019 if create_token.token_type == TokenType.SEQUENCE: 2020 expression = self._parse_types() 2021 extend_props(self._parse_properties()) 2022 else: 2023 expression = self._parse_ddl_select() 2024 2025 # Some dialects also support using a table as an alias instead of a SELECT. 2026 # Here we fallback to this as an alternative. 2027 if not expression and has_alias: 2028 expression = self._try_parse(self._parse_table_parts) 2029 2030 if create_token.token_type == TokenType.TABLE: 2031 # exp.Properties.Location.POST_EXPRESSION 2032 extend_props(self._parse_properties()) 2033 2034 indexes = [] 2035 while True: 2036 index = self._parse_index() 2037 2038 # exp.Properties.Location.POST_INDEX 2039 extend_props(self._parse_properties()) 2040 if not index: 2041 break 2042 else: 2043 self._match(TokenType.COMMA) 2044 indexes.append(index) 2045 elif create_token.token_type == TokenType.VIEW: 2046 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2047 no_schema_binding = True 2048 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2049 extend_props(self._parse_properties()) 2050 2051 shallow = self._match_text_seq("SHALLOW") 2052 2053 if self._match_texts(self.CLONE_KEYWORDS): 2054 copy = self._prev.text.lower() == "copy" 2055 clone = self.expression( 2056 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2057 ) 2058 2059 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2060 return self._parse_as_command(start) 2061 2062 create_kind_text = create_token.text.upper() 2063 return self.expression( 2064 exp.Create, 2065 this=this, 2066 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2067 replace=replace, 2068 refresh=refresh, 2069 unique=unique, 2070 expression=expression, 2071 exists=exists, 2072 properties=properties, 2073 indexes=indexes, 2074 no_schema_binding=no_schema_binding, 2075 begin=begin, 2076 end=end, 2077 clone=clone, 2078 concurrently=concurrently, 2079 clustered=clustered, 2080 ) 2081 2082 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2083 seq = exp.SequenceProperties() 2084 2085 options = [] 2086 index = self._index 2087 2088 while self._curr: 2089 self._match(TokenType.COMMA) 2090 if self._match_text_seq("INCREMENT"): 2091 self._match_text_seq("BY") 2092 self._match_text_seq("=") 2093 seq.set("increment", self._parse_term()) 2094 elif self._match_text_seq("MINVALUE"): 2095 seq.set("minvalue", self._parse_term()) 2096 elif self._match_text_seq("MAXVALUE"): 2097 seq.set("maxvalue", self._parse_term()) 2098 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2099 self._match_text_seq("=") 2100 seq.set("start", self._parse_term()) 2101 elif self._match_text_seq("CACHE"): 2102 # T-SQL allows empty CACHE which is initialized dynamically 2103 seq.set("cache", self._parse_number() or True) 2104 elif self._match_text_seq("OWNED", "BY"): 2105 # "OWNED BY NONE" is the default 2106 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2107 else: 2108 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2109 if opt: 2110 options.append(opt) 2111 else: 2112 break 2113 2114 seq.set("options", options if options else None) 2115 return None if self._index == index else seq 2116 2117 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2118 # only used for teradata currently 2119 self._match(TokenType.COMMA) 2120 2121 kwargs = { 2122 "no": self._match_text_seq("NO"), 2123 "dual": self._match_text_seq("DUAL"), 2124 "before": self._match_text_seq("BEFORE"), 2125 "default": self._match_text_seq("DEFAULT"), 2126 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2127 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2128 "after": self._match_text_seq("AFTER"), 2129 "minimum": self._match_texts(("MIN", "MINIMUM")), 2130 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2131 } 2132 2133 if self._match_texts(self.PROPERTY_PARSERS): 2134 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2135 try: 2136 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2137 except TypeError: 2138 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2139 2140 return None 2141 2142 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2143 return self._parse_wrapped_csv(self._parse_property) 2144 2145 def _parse_property(self) -> t.Optional[exp.Expression]: 2146 if self._match_texts(self.PROPERTY_PARSERS): 2147 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2148 2149 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2150 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2151 2152 if self._match_text_seq("COMPOUND", "SORTKEY"): 2153 return self._parse_sortkey(compound=True) 2154 2155 if self._match_text_seq("SQL", "SECURITY"): 2156 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2157 2158 index = self._index 2159 key = self._parse_column() 2160 2161 if not self._match(TokenType.EQ): 2162 self._retreat(index) 2163 return self._parse_sequence_properties() 2164 2165 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2166 if isinstance(key, exp.Column): 2167 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2168 2169 value = self._parse_bitwise() or self._parse_var(any_token=True) 2170 2171 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2172 if isinstance(value, exp.Column): 2173 value = exp.var(value.name) 2174 2175 return self.expression(exp.Property, this=key, value=value) 2176 2177 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2178 if self._match_text_seq("BY"): 2179 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2180 2181 self._match(TokenType.ALIAS) 2182 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2183 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2184 2185 return self.expression( 2186 exp.FileFormatProperty, 2187 this=( 2188 self.expression( 2189 exp.InputOutputFormat, 2190 input_format=input_format, 2191 output_format=output_format, 2192 ) 2193 if input_format or output_format 2194 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2195 ), 2196 ) 2197 2198 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2199 field = self._parse_field() 2200 if isinstance(field, exp.Identifier) and not field.quoted: 2201 field = exp.var(field) 2202 2203 return field 2204 2205 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2206 self._match(TokenType.EQ) 2207 self._match(TokenType.ALIAS) 2208 2209 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2210 2211 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2212 properties = [] 2213 while True: 2214 if before: 2215 prop = self._parse_property_before() 2216 else: 2217 prop = self._parse_property() 2218 if not prop: 2219 break 2220 for p in ensure_list(prop): 2221 properties.append(p) 2222 2223 if properties: 2224 return self.expression(exp.Properties, expressions=properties) 2225 2226 return None 2227 2228 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2229 return self.expression( 2230 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2231 ) 2232 2233 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2234 if self._match_texts(("DEFINER", "INVOKER")): 2235 security_specifier = self._prev.text.upper() 2236 return self.expression(exp.SecurityProperty, this=security_specifier) 2237 return None 2238 2239 def _parse_settings_property(self) -> exp.SettingsProperty: 2240 return self.expression( 2241 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2242 ) 2243 2244 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2245 if self._index >= 2: 2246 pre_volatile_token = self._tokens[self._index - 2] 2247 else: 2248 pre_volatile_token = None 2249 2250 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2251 return exp.VolatileProperty() 2252 2253 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2254 2255 def _parse_retention_period(self) -> exp.Var: 2256 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2257 number = self._parse_number() 2258 number_str = f"{number} " if number else "" 2259 unit = self._parse_var(any_token=True) 2260 return exp.var(f"{number_str}{unit}") 2261 2262 def _parse_system_versioning_property( 2263 self, with_: bool = False 2264 ) -> exp.WithSystemVersioningProperty: 2265 self._match(TokenType.EQ) 2266 prop = self.expression( 2267 exp.WithSystemVersioningProperty, 2268 **{ # type: ignore 2269 "on": True, 2270 "with": with_, 2271 }, 2272 ) 2273 2274 if self._match_text_seq("OFF"): 2275 prop.set("on", False) 2276 return prop 2277 2278 self._match(TokenType.ON) 2279 if self._match(TokenType.L_PAREN): 2280 while self._curr and not self._match(TokenType.R_PAREN): 2281 if self._match_text_seq("HISTORY_TABLE", "="): 2282 prop.set("this", self._parse_table_parts()) 2283 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2284 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2285 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2286 prop.set("retention_period", self._parse_retention_period()) 2287 2288 self._match(TokenType.COMMA) 2289 2290 return prop 2291 2292 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2293 self._match(TokenType.EQ) 2294 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2295 prop = self.expression(exp.DataDeletionProperty, on=on) 2296 2297 if self._match(TokenType.L_PAREN): 2298 while self._curr and not self._match(TokenType.R_PAREN): 2299 if self._match_text_seq("FILTER_COLUMN", "="): 2300 prop.set("filter_column", self._parse_column()) 2301 elif self._match_text_seq("RETENTION_PERIOD", "="): 2302 prop.set("retention_period", self._parse_retention_period()) 2303 2304 self._match(TokenType.COMMA) 2305 2306 return prop 2307 2308 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2309 kind = "HASH" 2310 expressions: t.Optional[t.List[exp.Expression]] = None 2311 if self._match_text_seq("BY", "HASH"): 2312 expressions = self._parse_wrapped_csv(self._parse_id_var) 2313 elif self._match_text_seq("BY", "RANDOM"): 2314 kind = "RANDOM" 2315 2316 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2317 buckets: t.Optional[exp.Expression] = None 2318 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2319 buckets = self._parse_number() 2320 2321 return self.expression( 2322 exp.DistributedByProperty, 2323 expressions=expressions, 2324 kind=kind, 2325 buckets=buckets, 2326 order=self._parse_order(), 2327 ) 2328 2329 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2330 self._match_text_seq("KEY") 2331 expressions = self._parse_wrapped_id_vars() 2332 return self.expression(expr_type, expressions=expressions) 2333 2334 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2335 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2336 prop = self._parse_system_versioning_property(with_=True) 2337 self._match_r_paren() 2338 return prop 2339 2340 if self._match(TokenType.L_PAREN, advance=False): 2341 return self._parse_wrapped_properties() 2342 2343 if self._match_text_seq("JOURNAL"): 2344 return self._parse_withjournaltable() 2345 2346 if self._match_texts(self.VIEW_ATTRIBUTES): 2347 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2348 2349 if self._match_text_seq("DATA"): 2350 return self._parse_withdata(no=False) 2351 elif self._match_text_seq("NO", "DATA"): 2352 return self._parse_withdata(no=True) 2353 2354 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2355 return self._parse_serde_properties(with_=True) 2356 2357 if self._match(TokenType.SCHEMA): 2358 return self.expression( 2359 exp.WithSchemaBindingProperty, 2360 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2361 ) 2362 2363 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2364 return self.expression( 2365 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2366 ) 2367 2368 if not self._next: 2369 return None 2370 2371 return self._parse_withisolatedloading() 2372 2373 def _parse_procedure_option(self) -> exp.Expression | None: 2374 if self._match_text_seq("EXECUTE", "AS"): 2375 return self.expression( 2376 exp.ExecuteAsProperty, 2377 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2378 or self._parse_string(), 2379 ) 2380 2381 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2382 2383 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2384 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2385 self._match(TokenType.EQ) 2386 2387 user = self._parse_id_var() 2388 self._match(TokenType.PARAMETER) 2389 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2390 2391 if not user or not host: 2392 return None 2393 2394 return exp.DefinerProperty(this=f"{user}@{host}") 2395 2396 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2397 self._match(TokenType.TABLE) 2398 self._match(TokenType.EQ) 2399 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2400 2401 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2402 return self.expression(exp.LogProperty, no=no) 2403 2404 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2405 return self.expression(exp.JournalProperty, **kwargs) 2406 2407 def _parse_checksum(self) -> exp.ChecksumProperty: 2408 self._match(TokenType.EQ) 2409 2410 on = None 2411 if self._match(TokenType.ON): 2412 on = True 2413 elif self._match_text_seq("OFF"): 2414 on = False 2415 2416 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2417 2418 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2419 return self.expression( 2420 exp.Cluster, 2421 expressions=( 2422 self._parse_wrapped_csv(self._parse_ordered) 2423 if wrapped 2424 else self._parse_csv(self._parse_ordered) 2425 ), 2426 ) 2427 2428 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2429 self._match_text_seq("BY") 2430 2431 self._match_l_paren() 2432 expressions = self._parse_csv(self._parse_column) 2433 self._match_r_paren() 2434 2435 if self._match_text_seq("SORTED", "BY"): 2436 self._match_l_paren() 2437 sorted_by = self._parse_csv(self._parse_ordered) 2438 self._match_r_paren() 2439 else: 2440 sorted_by = None 2441 2442 self._match(TokenType.INTO) 2443 buckets = self._parse_number() 2444 self._match_text_seq("BUCKETS") 2445 2446 return self.expression( 2447 exp.ClusteredByProperty, 2448 expressions=expressions, 2449 sorted_by=sorted_by, 2450 buckets=buckets, 2451 ) 2452 2453 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2454 if not self._match_text_seq("GRANTS"): 2455 self._retreat(self._index - 1) 2456 return None 2457 2458 return self.expression(exp.CopyGrantsProperty) 2459 2460 def _parse_freespace(self) -> exp.FreespaceProperty: 2461 self._match(TokenType.EQ) 2462 return self.expression( 2463 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2464 ) 2465 2466 def _parse_mergeblockratio( 2467 self, no: bool = False, default: bool = False 2468 ) -> exp.MergeBlockRatioProperty: 2469 if self._match(TokenType.EQ): 2470 return self.expression( 2471 exp.MergeBlockRatioProperty, 2472 this=self._parse_number(), 2473 percent=self._match(TokenType.PERCENT), 2474 ) 2475 2476 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2477 2478 def _parse_datablocksize( 2479 self, 2480 default: t.Optional[bool] = None, 2481 minimum: t.Optional[bool] = None, 2482 maximum: t.Optional[bool] = None, 2483 ) -> exp.DataBlocksizeProperty: 2484 self._match(TokenType.EQ) 2485 size = self._parse_number() 2486 2487 units = None 2488 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2489 units = self._prev.text 2490 2491 return self.expression( 2492 exp.DataBlocksizeProperty, 2493 size=size, 2494 units=units, 2495 default=default, 2496 minimum=minimum, 2497 maximum=maximum, 2498 ) 2499 2500 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2501 self._match(TokenType.EQ) 2502 always = self._match_text_seq("ALWAYS") 2503 manual = self._match_text_seq("MANUAL") 2504 never = self._match_text_seq("NEVER") 2505 default = self._match_text_seq("DEFAULT") 2506 2507 autotemp = None 2508 if self._match_text_seq("AUTOTEMP"): 2509 autotemp = self._parse_schema() 2510 2511 return self.expression( 2512 exp.BlockCompressionProperty, 2513 always=always, 2514 manual=manual, 2515 never=never, 2516 default=default, 2517 autotemp=autotemp, 2518 ) 2519 2520 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2521 index = self._index 2522 no = self._match_text_seq("NO") 2523 concurrent = self._match_text_seq("CONCURRENT") 2524 2525 if not self._match_text_seq("ISOLATED", "LOADING"): 2526 self._retreat(index) 2527 return None 2528 2529 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2530 return self.expression( 2531 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2532 ) 2533 2534 def _parse_locking(self) -> exp.LockingProperty: 2535 if self._match(TokenType.TABLE): 2536 kind = "TABLE" 2537 elif self._match(TokenType.VIEW): 2538 kind = "VIEW" 2539 elif self._match(TokenType.ROW): 2540 kind = "ROW" 2541 elif self._match_text_seq("DATABASE"): 2542 kind = "DATABASE" 2543 else: 2544 kind = None 2545 2546 if kind in ("DATABASE", "TABLE", "VIEW"): 2547 this = self._parse_table_parts() 2548 else: 2549 this = None 2550 2551 if self._match(TokenType.FOR): 2552 for_or_in = "FOR" 2553 elif self._match(TokenType.IN): 2554 for_or_in = "IN" 2555 else: 2556 for_or_in = None 2557 2558 if self._match_text_seq("ACCESS"): 2559 lock_type = "ACCESS" 2560 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2561 lock_type = "EXCLUSIVE" 2562 elif self._match_text_seq("SHARE"): 2563 lock_type = "SHARE" 2564 elif self._match_text_seq("READ"): 2565 lock_type = "READ" 2566 elif self._match_text_seq("WRITE"): 2567 lock_type = "WRITE" 2568 elif self._match_text_seq("CHECKSUM"): 2569 lock_type = "CHECKSUM" 2570 else: 2571 lock_type = None 2572 2573 override = self._match_text_seq("OVERRIDE") 2574 2575 return self.expression( 2576 exp.LockingProperty, 2577 this=this, 2578 kind=kind, 2579 for_or_in=for_or_in, 2580 lock_type=lock_type, 2581 override=override, 2582 ) 2583 2584 def _parse_partition_by(self) -> t.List[exp.Expression]: 2585 if self._match(TokenType.PARTITION_BY): 2586 return self._parse_csv(self._parse_assignment) 2587 return [] 2588 2589 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2590 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2591 if self._match_text_seq("MINVALUE"): 2592 return exp.var("MINVALUE") 2593 if self._match_text_seq("MAXVALUE"): 2594 return exp.var("MAXVALUE") 2595 return self._parse_bitwise() 2596 2597 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2598 expression = None 2599 from_expressions = None 2600 to_expressions = None 2601 2602 if self._match(TokenType.IN): 2603 this = self._parse_wrapped_csv(self._parse_bitwise) 2604 elif self._match(TokenType.FROM): 2605 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2606 self._match_text_seq("TO") 2607 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2608 elif self._match_text_seq("WITH", "(", "MODULUS"): 2609 this = self._parse_number() 2610 self._match_text_seq(",", "REMAINDER") 2611 expression = self._parse_number() 2612 self._match_r_paren() 2613 else: 2614 self.raise_error("Failed to parse partition bound spec.") 2615 2616 return self.expression( 2617 exp.PartitionBoundSpec, 2618 this=this, 2619 expression=expression, 2620 from_expressions=from_expressions, 2621 to_expressions=to_expressions, 2622 ) 2623 2624 # https://www.postgresql.org/docs/current/sql-createtable.html 2625 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2626 if not self._match_text_seq("OF"): 2627 self._retreat(self._index - 1) 2628 return None 2629 2630 this = self._parse_table(schema=True) 2631 2632 if self._match(TokenType.DEFAULT): 2633 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2634 elif self._match_text_seq("FOR", "VALUES"): 2635 expression = self._parse_partition_bound_spec() 2636 else: 2637 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2638 2639 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2640 2641 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2642 self._match(TokenType.EQ) 2643 return self.expression( 2644 exp.PartitionedByProperty, 2645 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2646 ) 2647 2648 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2649 if self._match_text_seq("AND", "STATISTICS"): 2650 statistics = True 2651 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2652 statistics = False 2653 else: 2654 statistics = None 2655 2656 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2657 2658 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2659 if self._match_text_seq("SQL"): 2660 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2661 return None 2662 2663 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2664 if self._match_text_seq("SQL", "DATA"): 2665 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2666 return None 2667 2668 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2669 if self._match_text_seq("PRIMARY", "INDEX"): 2670 return exp.NoPrimaryIndexProperty() 2671 if self._match_text_seq("SQL"): 2672 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2673 return None 2674 2675 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2676 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2677 return exp.OnCommitProperty() 2678 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2679 return exp.OnCommitProperty(delete=True) 2680 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2681 2682 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2683 if self._match_text_seq("SQL", "DATA"): 2684 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2685 return None 2686 2687 def _parse_distkey(self) -> exp.DistKeyProperty: 2688 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2689 2690 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2691 table = self._parse_table(schema=True) 2692 2693 options = [] 2694 while self._match_texts(("INCLUDING", "EXCLUDING")): 2695 this = self._prev.text.upper() 2696 2697 id_var = self._parse_id_var() 2698 if not id_var: 2699 return None 2700 2701 options.append( 2702 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2703 ) 2704 2705 return self.expression(exp.LikeProperty, this=table, expressions=options) 2706 2707 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2708 return self.expression( 2709 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2710 ) 2711 2712 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2713 self._match(TokenType.EQ) 2714 return self.expression( 2715 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2716 ) 2717 2718 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2719 self._match_text_seq("WITH", "CONNECTION") 2720 return self.expression( 2721 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2722 ) 2723 2724 def _parse_returns(self) -> exp.ReturnsProperty: 2725 value: t.Optional[exp.Expression] 2726 null = None 2727 is_table = self._match(TokenType.TABLE) 2728 2729 if is_table: 2730 if self._match(TokenType.LT): 2731 value = self.expression( 2732 exp.Schema, 2733 this="TABLE", 2734 expressions=self._parse_csv(self._parse_struct_types), 2735 ) 2736 if not self._match(TokenType.GT): 2737 self.raise_error("Expecting >") 2738 else: 2739 value = self._parse_schema(exp.var("TABLE")) 2740 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2741 null = True 2742 value = None 2743 else: 2744 value = self._parse_types() 2745 2746 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2747 2748 def _parse_describe(self) -> exp.Describe: 2749 kind = self._match_set(self.CREATABLES) and self._prev.text 2750 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2751 if self._match(TokenType.DOT): 2752 style = None 2753 self._retreat(self._index - 2) 2754 2755 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2756 2757 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2758 this = self._parse_statement() 2759 else: 2760 this = self._parse_table(schema=True) 2761 2762 properties = self._parse_properties() 2763 expressions = properties.expressions if properties else None 2764 partition = self._parse_partition() 2765 return self.expression( 2766 exp.Describe, 2767 this=this, 2768 style=style, 2769 kind=kind, 2770 expressions=expressions, 2771 partition=partition, 2772 format=format, 2773 ) 2774 2775 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2776 kind = self._prev.text.upper() 2777 expressions = [] 2778 2779 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2780 if self._match(TokenType.WHEN): 2781 expression = self._parse_disjunction() 2782 self._match(TokenType.THEN) 2783 else: 2784 expression = None 2785 2786 else_ = self._match(TokenType.ELSE) 2787 2788 if not self._match(TokenType.INTO): 2789 return None 2790 2791 return self.expression( 2792 exp.ConditionalInsert, 2793 this=self.expression( 2794 exp.Insert, 2795 this=self._parse_table(schema=True), 2796 expression=self._parse_derived_table_values(), 2797 ), 2798 expression=expression, 2799 else_=else_, 2800 ) 2801 2802 expression = parse_conditional_insert() 2803 while expression is not None: 2804 expressions.append(expression) 2805 expression = parse_conditional_insert() 2806 2807 return self.expression( 2808 exp.MultitableInserts, 2809 kind=kind, 2810 comments=comments, 2811 expressions=expressions, 2812 source=self._parse_table(), 2813 ) 2814 2815 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2816 comments = [] 2817 hint = self._parse_hint() 2818 overwrite = self._match(TokenType.OVERWRITE) 2819 ignore = self._match(TokenType.IGNORE) 2820 local = self._match_text_seq("LOCAL") 2821 alternative = None 2822 is_function = None 2823 2824 if self._match_text_seq("DIRECTORY"): 2825 this: t.Optional[exp.Expression] = self.expression( 2826 exp.Directory, 2827 this=self._parse_var_or_string(), 2828 local=local, 2829 row_format=self._parse_row_format(match_row=True), 2830 ) 2831 else: 2832 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2833 comments += ensure_list(self._prev_comments) 2834 return self._parse_multitable_inserts(comments) 2835 2836 if self._match(TokenType.OR): 2837 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2838 2839 self._match(TokenType.INTO) 2840 comments += ensure_list(self._prev_comments) 2841 self._match(TokenType.TABLE) 2842 is_function = self._match(TokenType.FUNCTION) 2843 2844 this = ( 2845 self._parse_table(schema=True, parse_partition=True) 2846 if not is_function 2847 else self._parse_function() 2848 ) 2849 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2850 this.set("alias", self._parse_table_alias()) 2851 2852 returning = self._parse_returning() 2853 2854 return self.expression( 2855 exp.Insert, 2856 comments=comments, 2857 hint=hint, 2858 is_function=is_function, 2859 this=this, 2860 stored=self._match_text_seq("STORED") and self._parse_stored(), 2861 by_name=self._match_text_seq("BY", "NAME"), 2862 exists=self._parse_exists(), 2863 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2864 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2865 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2866 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2867 conflict=self._parse_on_conflict(), 2868 returning=returning or self._parse_returning(), 2869 overwrite=overwrite, 2870 alternative=alternative, 2871 ignore=ignore, 2872 source=self._match(TokenType.TABLE) and self._parse_table(), 2873 ) 2874 2875 def _parse_kill(self) -> exp.Kill: 2876 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2877 2878 return self.expression( 2879 exp.Kill, 2880 this=self._parse_primary(), 2881 kind=kind, 2882 ) 2883 2884 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2885 conflict = self._match_text_seq("ON", "CONFLICT") 2886 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2887 2888 if not conflict and not duplicate: 2889 return None 2890 2891 conflict_keys = None 2892 constraint = None 2893 2894 if conflict: 2895 if self._match_text_seq("ON", "CONSTRAINT"): 2896 constraint = self._parse_id_var() 2897 elif self._match(TokenType.L_PAREN): 2898 conflict_keys = self._parse_csv(self._parse_id_var) 2899 self._match_r_paren() 2900 2901 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2902 if self._prev.token_type == TokenType.UPDATE: 2903 self._match(TokenType.SET) 2904 expressions = self._parse_csv(self._parse_equality) 2905 else: 2906 expressions = None 2907 2908 return self.expression( 2909 exp.OnConflict, 2910 duplicate=duplicate, 2911 expressions=expressions, 2912 action=action, 2913 conflict_keys=conflict_keys, 2914 constraint=constraint, 2915 where=self._parse_where(), 2916 ) 2917 2918 def _parse_returning(self) -> t.Optional[exp.Returning]: 2919 if not self._match(TokenType.RETURNING): 2920 return None 2921 return self.expression( 2922 exp.Returning, 2923 expressions=self._parse_csv(self._parse_expression), 2924 into=self._match(TokenType.INTO) and self._parse_table_part(), 2925 ) 2926 2927 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2928 if not self._match(TokenType.FORMAT): 2929 return None 2930 return self._parse_row_format() 2931 2932 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2933 index = self._index 2934 with_ = with_ or self._match_text_seq("WITH") 2935 2936 if not self._match(TokenType.SERDE_PROPERTIES): 2937 self._retreat(index) 2938 return None 2939 return self.expression( 2940 exp.SerdeProperties, 2941 **{ # type: ignore 2942 "expressions": self._parse_wrapped_properties(), 2943 "with": with_, 2944 }, 2945 ) 2946 2947 def _parse_row_format( 2948 self, match_row: bool = False 2949 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2950 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2951 return None 2952 2953 if self._match_text_seq("SERDE"): 2954 this = self._parse_string() 2955 2956 serde_properties = self._parse_serde_properties() 2957 2958 return self.expression( 2959 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2960 ) 2961 2962 self._match_text_seq("DELIMITED") 2963 2964 kwargs = {} 2965 2966 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2967 kwargs["fields"] = self._parse_string() 2968 if self._match_text_seq("ESCAPED", "BY"): 2969 kwargs["escaped"] = self._parse_string() 2970 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2971 kwargs["collection_items"] = self._parse_string() 2972 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2973 kwargs["map_keys"] = self._parse_string() 2974 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2975 kwargs["lines"] = self._parse_string() 2976 if self._match_text_seq("NULL", "DEFINED", "AS"): 2977 kwargs["null"] = self._parse_string() 2978 2979 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2980 2981 def _parse_load(self) -> exp.LoadData | exp.Command: 2982 if self._match_text_seq("DATA"): 2983 local = self._match_text_seq("LOCAL") 2984 self._match_text_seq("INPATH") 2985 inpath = self._parse_string() 2986 overwrite = self._match(TokenType.OVERWRITE) 2987 self._match_pair(TokenType.INTO, TokenType.TABLE) 2988 2989 return self.expression( 2990 exp.LoadData, 2991 this=self._parse_table(schema=True), 2992 local=local, 2993 overwrite=overwrite, 2994 inpath=inpath, 2995 partition=self._parse_partition(), 2996 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2997 serde=self._match_text_seq("SERDE") and self._parse_string(), 2998 ) 2999 return self._parse_as_command(self._prev) 3000 3001 def _parse_delete(self) -> exp.Delete: 3002 # This handles MySQL's "Multiple-Table Syntax" 3003 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3004 tables = None 3005 if not self._match(TokenType.FROM, advance=False): 3006 tables = self._parse_csv(self._parse_table) or None 3007 3008 returning = self._parse_returning() 3009 3010 return self.expression( 3011 exp.Delete, 3012 tables=tables, 3013 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3014 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3015 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3016 where=self._parse_where(), 3017 returning=returning or self._parse_returning(), 3018 limit=self._parse_limit(), 3019 ) 3020 3021 def _parse_update(self) -> exp.Update: 3022 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3023 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3024 returning = self._parse_returning() 3025 return self.expression( 3026 exp.Update, 3027 **{ # type: ignore 3028 "this": this, 3029 "expressions": expressions, 3030 "from": self._parse_from(joins=True), 3031 "where": self._parse_where(), 3032 "returning": returning or self._parse_returning(), 3033 "order": self._parse_order(), 3034 "limit": self._parse_limit(), 3035 }, 3036 ) 3037 3038 def _parse_use(self) -> exp.Use: 3039 return self.expression( 3040 exp.Use, 3041 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3042 this=self._parse_table(schema=False), 3043 ) 3044 3045 def _parse_uncache(self) -> exp.Uncache: 3046 if not self._match(TokenType.TABLE): 3047 self.raise_error("Expecting TABLE after UNCACHE") 3048 3049 return self.expression( 3050 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3051 ) 3052 3053 def _parse_cache(self) -> exp.Cache: 3054 lazy = self._match_text_seq("LAZY") 3055 self._match(TokenType.TABLE) 3056 table = self._parse_table(schema=True) 3057 3058 options = [] 3059 if self._match_text_seq("OPTIONS"): 3060 self._match_l_paren() 3061 k = self._parse_string() 3062 self._match(TokenType.EQ) 3063 v = self._parse_string() 3064 options = [k, v] 3065 self._match_r_paren() 3066 3067 self._match(TokenType.ALIAS) 3068 return self.expression( 3069 exp.Cache, 3070 this=table, 3071 lazy=lazy, 3072 options=options, 3073 expression=self._parse_select(nested=True), 3074 ) 3075 3076 def _parse_partition(self) -> t.Optional[exp.Partition]: 3077 if not self._match_texts(self.PARTITION_KEYWORDS): 3078 return None 3079 3080 return self.expression( 3081 exp.Partition, 3082 subpartition=self._prev.text.upper() == "SUBPARTITION", 3083 expressions=self._parse_wrapped_csv(self._parse_assignment), 3084 ) 3085 3086 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3087 def _parse_value_expression() -> t.Optional[exp.Expression]: 3088 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3089 return exp.var(self._prev.text.upper()) 3090 return self._parse_expression() 3091 3092 if self._match(TokenType.L_PAREN): 3093 expressions = self._parse_csv(_parse_value_expression) 3094 self._match_r_paren() 3095 return self.expression(exp.Tuple, expressions=expressions) 3096 3097 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3098 expression = self._parse_expression() 3099 if expression: 3100 return self.expression(exp.Tuple, expressions=[expression]) 3101 return None 3102 3103 def _parse_projections(self) -> t.List[exp.Expression]: 3104 return self._parse_expressions() 3105 3106 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3107 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3108 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3109 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3110 ) 3111 elif self._match(TokenType.FROM): 3112 from_ = self._parse_from(skip_from_token=True) 3113 # Support parentheses for duckdb FROM-first syntax 3114 select = self._parse_select() 3115 if select: 3116 select.set("from", from_) 3117 this = select 3118 else: 3119 this = exp.select("*").from_(t.cast(exp.From, from_)) 3120 else: 3121 this = ( 3122 self._parse_table() 3123 if table 3124 else self._parse_select(nested=True, parse_set_operation=False) 3125 ) 3126 3127 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3128 # in case a modifier (e.g. join) is following 3129 if table and isinstance(this, exp.Values) and this.alias: 3130 alias = this.args["alias"].pop() 3131 this = exp.Table(this=this, alias=alias) 3132 3133 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3134 3135 return this 3136 3137 def _parse_select( 3138 self, 3139 nested: bool = False, 3140 table: bool = False, 3141 parse_subquery_alias: bool = True, 3142 parse_set_operation: bool = True, 3143 ) -> t.Optional[exp.Expression]: 3144 cte = self._parse_with() 3145 3146 if cte: 3147 this = self._parse_statement() 3148 3149 if not this: 3150 self.raise_error("Failed to parse any statement following CTE") 3151 return cte 3152 3153 if "with" in this.arg_types: 3154 this.set("with", cte) 3155 else: 3156 self.raise_error(f"{this.key} does not support CTE") 3157 this = cte 3158 3159 return this 3160 3161 # duckdb supports leading with FROM x 3162 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3163 3164 if self._match(TokenType.SELECT): 3165 comments = self._prev_comments 3166 3167 hint = self._parse_hint() 3168 3169 if self._next and not self._next.token_type == TokenType.DOT: 3170 all_ = self._match(TokenType.ALL) 3171 distinct = self._match_set(self.DISTINCT_TOKENS) 3172 else: 3173 all_, distinct = None, None 3174 3175 kind = ( 3176 self._match(TokenType.ALIAS) 3177 and self._match_texts(("STRUCT", "VALUE")) 3178 and self._prev.text.upper() 3179 ) 3180 3181 if distinct: 3182 distinct = self.expression( 3183 exp.Distinct, 3184 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3185 ) 3186 3187 if all_ and distinct: 3188 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3189 3190 operation_modifiers = [] 3191 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3192 operation_modifiers.append(exp.var(self._prev.text.upper())) 3193 3194 limit = self._parse_limit(top=True) 3195 projections = self._parse_projections() 3196 3197 this = self.expression( 3198 exp.Select, 3199 kind=kind, 3200 hint=hint, 3201 distinct=distinct, 3202 expressions=projections, 3203 limit=limit, 3204 operation_modifiers=operation_modifiers or None, 3205 ) 3206 this.comments = comments 3207 3208 into = self._parse_into() 3209 if into: 3210 this.set("into", into) 3211 3212 if not from_: 3213 from_ = self._parse_from() 3214 3215 if from_: 3216 this.set("from", from_) 3217 3218 this = self._parse_query_modifiers(this) 3219 elif (table or nested) and self._match(TokenType.L_PAREN): 3220 this = self._parse_wrapped_select(table=table) 3221 3222 # We return early here so that the UNION isn't attached to the subquery by the 3223 # following call to _parse_set_operations, but instead becomes the parent node 3224 self._match_r_paren() 3225 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3226 elif self._match(TokenType.VALUES, advance=False): 3227 this = self._parse_derived_table_values() 3228 elif from_: 3229 this = exp.select("*").from_(from_.this, copy=False) 3230 elif self._match(TokenType.SUMMARIZE): 3231 table = self._match(TokenType.TABLE) 3232 this = self._parse_select() or self._parse_string() or self._parse_table() 3233 return self.expression(exp.Summarize, this=this, table=table) 3234 elif self._match(TokenType.DESCRIBE): 3235 this = self._parse_describe() 3236 elif self._match_text_seq("STREAM"): 3237 this = self._parse_function() 3238 if this: 3239 this = self.expression(exp.Stream, this=this) 3240 else: 3241 self._retreat(self._index - 1) 3242 else: 3243 this = None 3244 3245 return self._parse_set_operations(this) if parse_set_operation else this 3246 3247 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3248 self._match_text_seq("SEARCH") 3249 3250 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3251 3252 if not kind: 3253 return None 3254 3255 self._match_text_seq("FIRST", "BY") 3256 3257 return self.expression( 3258 exp.RecursiveWithSearch, 3259 kind=kind, 3260 this=self._parse_id_var(), 3261 expression=self._match_text_seq("SET") and self._parse_id_var(), 3262 using=self._match_text_seq("USING") and self._parse_id_var(), 3263 ) 3264 3265 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3266 if not skip_with_token and not self._match(TokenType.WITH): 3267 return None 3268 3269 comments = self._prev_comments 3270 recursive = self._match(TokenType.RECURSIVE) 3271 3272 last_comments = None 3273 expressions = [] 3274 while True: 3275 cte = self._parse_cte() 3276 if isinstance(cte, exp.CTE): 3277 expressions.append(cte) 3278 if last_comments: 3279 cte.add_comments(last_comments) 3280 3281 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3282 break 3283 else: 3284 self._match(TokenType.WITH) 3285 3286 last_comments = self._prev_comments 3287 3288 return self.expression( 3289 exp.With, 3290 comments=comments, 3291 expressions=expressions, 3292 recursive=recursive, 3293 search=self._parse_recursive_with_search(), 3294 ) 3295 3296 def _parse_cte(self) -> t.Optional[exp.CTE]: 3297 index = self._index 3298 3299 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3300 if not alias or not alias.this: 3301 self.raise_error("Expected CTE to have alias") 3302 3303 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3304 self._retreat(index) 3305 return None 3306 3307 comments = self._prev_comments 3308 3309 if self._match_text_seq("NOT", "MATERIALIZED"): 3310 materialized = False 3311 elif self._match_text_seq("MATERIALIZED"): 3312 materialized = True 3313 else: 3314 materialized = None 3315 3316 cte = self.expression( 3317 exp.CTE, 3318 this=self._parse_wrapped(self._parse_statement), 3319 alias=alias, 3320 materialized=materialized, 3321 comments=comments, 3322 ) 3323 3324 if isinstance(cte.this, exp.Values): 3325 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3326 3327 return cte 3328 3329 def _parse_table_alias( 3330 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3331 ) -> t.Optional[exp.TableAlias]: 3332 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3333 # so this section tries to parse the clause version and if it fails, it treats the token 3334 # as an identifier (alias) 3335 if self._can_parse_limit_or_offset(): 3336 return None 3337 3338 any_token = self._match(TokenType.ALIAS) 3339 alias = ( 3340 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3341 or self._parse_string_as_identifier() 3342 ) 3343 3344 index = self._index 3345 if self._match(TokenType.L_PAREN): 3346 columns = self._parse_csv(self._parse_function_parameter) 3347 self._match_r_paren() if columns else self._retreat(index) 3348 else: 3349 columns = None 3350 3351 if not alias and not columns: 3352 return None 3353 3354 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3355 3356 # We bubble up comments from the Identifier to the TableAlias 3357 if isinstance(alias, exp.Identifier): 3358 table_alias.add_comments(alias.pop_comments()) 3359 3360 return table_alias 3361 3362 def _parse_subquery( 3363 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3364 ) -> t.Optional[exp.Subquery]: 3365 if not this: 3366 return None 3367 3368 return self.expression( 3369 exp.Subquery, 3370 this=this, 3371 pivots=self._parse_pivots(), 3372 alias=self._parse_table_alias() if parse_alias else None, 3373 sample=self._parse_table_sample(), 3374 ) 3375 3376 def _implicit_unnests_to_explicit(self, this: E) -> E: 3377 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3378 3379 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3380 for i, join in enumerate(this.args.get("joins") or []): 3381 table = join.this 3382 normalized_table = table.copy() 3383 normalized_table.meta["maybe_column"] = True 3384 normalized_table = _norm(normalized_table, dialect=self.dialect) 3385 3386 if isinstance(table, exp.Table) and not join.args.get("on"): 3387 if normalized_table.parts[0].name in refs: 3388 table_as_column = table.to_column() 3389 unnest = exp.Unnest(expressions=[table_as_column]) 3390 3391 # Table.to_column creates a parent Alias node that we want to convert to 3392 # a TableAlias and attach to the Unnest, so it matches the parser's output 3393 if isinstance(table.args.get("alias"), exp.TableAlias): 3394 table_as_column.replace(table_as_column.this) 3395 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3396 3397 table.replace(unnest) 3398 3399 refs.add(normalized_table.alias_or_name) 3400 3401 return this 3402 3403 def _parse_query_modifiers( 3404 self, this: t.Optional[exp.Expression] 3405 ) -> t.Optional[exp.Expression]: 3406 if isinstance(this, self.MODIFIABLES): 3407 for join in self._parse_joins(): 3408 this.append("joins", join) 3409 for lateral in iter(self._parse_lateral, None): 3410 this.append("laterals", lateral) 3411 3412 while True: 3413 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3414 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3415 key, expression = parser(self) 3416 3417 if expression: 3418 this.set(key, expression) 3419 if key == "limit": 3420 offset = expression.args.pop("offset", None) 3421 3422 if offset: 3423 offset = exp.Offset(expression=offset) 3424 this.set("offset", offset) 3425 3426 limit_by_expressions = expression.expressions 3427 expression.set("expressions", None) 3428 offset.set("expressions", limit_by_expressions) 3429 continue 3430 break 3431 3432 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3433 this = self._implicit_unnests_to_explicit(this) 3434 3435 return this 3436 3437 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3438 start = self._curr 3439 while self._curr: 3440 self._advance() 3441 3442 end = self._tokens[self._index - 1] 3443 return exp.Hint(expressions=[self._find_sql(start, end)]) 3444 3445 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3446 return self._parse_function_call() 3447 3448 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3449 start_index = self._index 3450 should_fallback_to_string = False 3451 3452 hints = [] 3453 try: 3454 for hint in iter( 3455 lambda: self._parse_csv( 3456 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3457 ), 3458 [], 3459 ): 3460 hints.extend(hint) 3461 except ParseError: 3462 should_fallback_to_string = True 3463 3464 if should_fallback_to_string or self._curr: 3465 self._retreat(start_index) 3466 return self._parse_hint_fallback_to_string() 3467 3468 return self.expression(exp.Hint, expressions=hints) 3469 3470 def _parse_hint(self) -> t.Optional[exp.Hint]: 3471 if self._match(TokenType.HINT) and self._prev_comments: 3472 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3473 3474 return None 3475 3476 def _parse_into(self) -> t.Optional[exp.Into]: 3477 if not self._match(TokenType.INTO): 3478 return None 3479 3480 temp = self._match(TokenType.TEMPORARY) 3481 unlogged = self._match_text_seq("UNLOGGED") 3482 self._match(TokenType.TABLE) 3483 3484 return self.expression( 3485 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3486 ) 3487 3488 def _parse_from( 3489 self, joins: bool = False, skip_from_token: bool = False 3490 ) -> t.Optional[exp.From]: 3491 if not skip_from_token and not self._match(TokenType.FROM): 3492 return None 3493 3494 return self.expression( 3495 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3496 ) 3497 3498 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3499 return self.expression( 3500 exp.MatchRecognizeMeasure, 3501 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3502 this=self._parse_expression(), 3503 ) 3504 3505 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3506 if not self._match(TokenType.MATCH_RECOGNIZE): 3507 return None 3508 3509 self._match_l_paren() 3510 3511 partition = self._parse_partition_by() 3512 order = self._parse_order() 3513 3514 measures = ( 3515 self._parse_csv(self._parse_match_recognize_measure) 3516 if self._match_text_seq("MEASURES") 3517 else None 3518 ) 3519 3520 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3521 rows = exp.var("ONE ROW PER MATCH") 3522 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3523 text = "ALL ROWS PER MATCH" 3524 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3525 text += " SHOW EMPTY MATCHES" 3526 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3527 text += " OMIT EMPTY MATCHES" 3528 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3529 text += " WITH UNMATCHED ROWS" 3530 rows = exp.var(text) 3531 else: 3532 rows = None 3533 3534 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3535 text = "AFTER MATCH SKIP" 3536 if self._match_text_seq("PAST", "LAST", "ROW"): 3537 text += " PAST LAST ROW" 3538 elif self._match_text_seq("TO", "NEXT", "ROW"): 3539 text += " TO NEXT ROW" 3540 elif self._match_text_seq("TO", "FIRST"): 3541 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3542 elif self._match_text_seq("TO", "LAST"): 3543 text += f" TO LAST {self._advance_any().text}" # type: ignore 3544 after = exp.var(text) 3545 else: 3546 after = None 3547 3548 if self._match_text_seq("PATTERN"): 3549 self._match_l_paren() 3550 3551 if not self._curr: 3552 self.raise_error("Expecting )", self._curr) 3553 3554 paren = 1 3555 start = self._curr 3556 3557 while self._curr and paren > 0: 3558 if self._curr.token_type == TokenType.L_PAREN: 3559 paren += 1 3560 if self._curr.token_type == TokenType.R_PAREN: 3561 paren -= 1 3562 3563 end = self._prev 3564 self._advance() 3565 3566 if paren > 0: 3567 self.raise_error("Expecting )", self._curr) 3568 3569 pattern = exp.var(self._find_sql(start, end)) 3570 else: 3571 pattern = None 3572 3573 define = ( 3574 self._parse_csv(self._parse_name_as_expression) 3575 if self._match_text_seq("DEFINE") 3576 else None 3577 ) 3578 3579 self._match_r_paren() 3580 3581 return self.expression( 3582 exp.MatchRecognize, 3583 partition_by=partition, 3584 order=order, 3585 measures=measures, 3586 rows=rows, 3587 after=after, 3588 pattern=pattern, 3589 define=define, 3590 alias=self._parse_table_alias(), 3591 ) 3592 3593 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3594 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3595 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3596 cross_apply = False 3597 3598 if cross_apply is not None: 3599 this = self._parse_select(table=True) 3600 view = None 3601 outer = None 3602 elif self._match(TokenType.LATERAL): 3603 this = self._parse_select(table=True) 3604 view = self._match(TokenType.VIEW) 3605 outer = self._match(TokenType.OUTER) 3606 else: 3607 return None 3608 3609 if not this: 3610 this = ( 3611 self._parse_unnest() 3612 or self._parse_function() 3613 or self._parse_id_var(any_token=False) 3614 ) 3615 3616 while self._match(TokenType.DOT): 3617 this = exp.Dot( 3618 this=this, 3619 expression=self._parse_function() or self._parse_id_var(any_token=False), 3620 ) 3621 3622 ordinality: t.Optional[bool] = None 3623 3624 if view: 3625 table = self._parse_id_var(any_token=False) 3626 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3627 table_alias: t.Optional[exp.TableAlias] = self.expression( 3628 exp.TableAlias, this=table, columns=columns 3629 ) 3630 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3631 # We move the alias from the lateral's child node to the lateral itself 3632 table_alias = this.args["alias"].pop() 3633 else: 3634 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3635 table_alias = self._parse_table_alias() 3636 3637 return self.expression( 3638 exp.Lateral, 3639 this=this, 3640 view=view, 3641 outer=outer, 3642 alias=table_alias, 3643 cross_apply=cross_apply, 3644 ordinality=ordinality, 3645 ) 3646 3647 def _parse_join_parts( 3648 self, 3649 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3650 return ( 3651 self._match_set(self.JOIN_METHODS) and self._prev, 3652 self._match_set(self.JOIN_SIDES) and self._prev, 3653 self._match_set(self.JOIN_KINDS) and self._prev, 3654 ) 3655 3656 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3657 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3658 this = self._parse_column() 3659 if isinstance(this, exp.Column): 3660 return this.this 3661 return this 3662 3663 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3664 3665 def _parse_join( 3666 self, skip_join_token: bool = False, parse_bracket: bool = False 3667 ) -> t.Optional[exp.Join]: 3668 if self._match(TokenType.COMMA): 3669 table = self._try_parse(self._parse_table) 3670 if table: 3671 return self.expression(exp.Join, this=table) 3672 return None 3673 3674 index = self._index 3675 method, side, kind = self._parse_join_parts() 3676 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3677 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3678 3679 if not skip_join_token and not join: 3680 self._retreat(index) 3681 kind = None 3682 method = None 3683 side = None 3684 3685 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3686 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3687 3688 if not skip_join_token and not join and not outer_apply and not cross_apply: 3689 return None 3690 3691 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3692 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3693 kwargs["expressions"] = self._parse_csv( 3694 lambda: self._parse_table(parse_bracket=parse_bracket) 3695 ) 3696 3697 if method: 3698 kwargs["method"] = method.text 3699 if side: 3700 kwargs["side"] = side.text 3701 if kind: 3702 kwargs["kind"] = kind.text 3703 if hint: 3704 kwargs["hint"] = hint 3705 3706 if self._match(TokenType.MATCH_CONDITION): 3707 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3708 3709 if self._match(TokenType.ON): 3710 kwargs["on"] = self._parse_assignment() 3711 elif self._match(TokenType.USING): 3712 kwargs["using"] = self._parse_using_identifiers() 3713 elif ( 3714 not (outer_apply or cross_apply) 3715 and not isinstance(kwargs["this"], exp.Unnest) 3716 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3717 ): 3718 index = self._index 3719 joins: t.Optional[list] = list(self._parse_joins()) 3720 3721 if joins and self._match(TokenType.ON): 3722 kwargs["on"] = self._parse_assignment() 3723 elif joins and self._match(TokenType.USING): 3724 kwargs["using"] = self._parse_using_identifiers() 3725 else: 3726 joins = None 3727 self._retreat(index) 3728 3729 kwargs["this"].set("joins", joins if joins else None) 3730 3731 comments = [c for token in (method, side, kind) if token for c in token.comments] 3732 return self.expression(exp.Join, comments=comments, **kwargs) 3733 3734 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3735 this = self._parse_assignment() 3736 3737 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3738 return this 3739 3740 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3741 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3742 3743 return this 3744 3745 def _parse_index_params(self) -> exp.IndexParameters: 3746 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3747 3748 if self._match(TokenType.L_PAREN, advance=False): 3749 columns = self._parse_wrapped_csv(self._parse_with_operator) 3750 else: 3751 columns = None 3752 3753 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3754 partition_by = self._parse_partition_by() 3755 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3756 tablespace = ( 3757 self._parse_var(any_token=True) 3758 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3759 else None 3760 ) 3761 where = self._parse_where() 3762 3763 on = self._parse_field() if self._match(TokenType.ON) else None 3764 3765 return self.expression( 3766 exp.IndexParameters, 3767 using=using, 3768 columns=columns, 3769 include=include, 3770 partition_by=partition_by, 3771 where=where, 3772 with_storage=with_storage, 3773 tablespace=tablespace, 3774 on=on, 3775 ) 3776 3777 def _parse_index( 3778 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3779 ) -> t.Optional[exp.Index]: 3780 if index or anonymous: 3781 unique = None 3782 primary = None 3783 amp = None 3784 3785 self._match(TokenType.ON) 3786 self._match(TokenType.TABLE) # hive 3787 table = self._parse_table_parts(schema=True) 3788 else: 3789 unique = self._match(TokenType.UNIQUE) 3790 primary = self._match_text_seq("PRIMARY") 3791 amp = self._match_text_seq("AMP") 3792 3793 if not self._match(TokenType.INDEX): 3794 return None 3795 3796 index = self._parse_id_var() 3797 table = None 3798 3799 params = self._parse_index_params() 3800 3801 return self.expression( 3802 exp.Index, 3803 this=index, 3804 table=table, 3805 unique=unique, 3806 primary=primary, 3807 amp=amp, 3808 params=params, 3809 ) 3810 3811 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3812 hints: t.List[exp.Expression] = [] 3813 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3814 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3815 hints.append( 3816 self.expression( 3817 exp.WithTableHint, 3818 expressions=self._parse_csv( 3819 lambda: self._parse_function() or self._parse_var(any_token=True) 3820 ), 3821 ) 3822 ) 3823 self._match_r_paren() 3824 else: 3825 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3826 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3827 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3828 3829 self._match_set((TokenType.INDEX, TokenType.KEY)) 3830 if self._match(TokenType.FOR): 3831 hint.set("target", self._advance_any() and self._prev.text.upper()) 3832 3833 hint.set("expressions", self._parse_wrapped_id_vars()) 3834 hints.append(hint) 3835 3836 return hints or None 3837 3838 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3839 return ( 3840 (not schema and self._parse_function(optional_parens=False)) 3841 or self._parse_id_var(any_token=False) 3842 or self._parse_string_as_identifier() 3843 or self._parse_placeholder() 3844 ) 3845 3846 def _parse_table_parts( 3847 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3848 ) -> exp.Table: 3849 catalog = None 3850 db = None 3851 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3852 3853 while self._match(TokenType.DOT): 3854 if catalog: 3855 # This allows nesting the table in arbitrarily many dot expressions if needed 3856 table = self.expression( 3857 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3858 ) 3859 else: 3860 catalog = db 3861 db = table 3862 # "" used for tsql FROM a..b case 3863 table = self._parse_table_part(schema=schema) or "" 3864 3865 if ( 3866 wildcard 3867 and self._is_connected() 3868 and (isinstance(table, exp.Identifier) or not table) 3869 and self._match(TokenType.STAR) 3870 ): 3871 if isinstance(table, exp.Identifier): 3872 table.args["this"] += "*" 3873 else: 3874 table = exp.Identifier(this="*") 3875 3876 # We bubble up comments from the Identifier to the Table 3877 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3878 3879 if is_db_reference: 3880 catalog = db 3881 db = table 3882 table = None 3883 3884 if not table and not is_db_reference: 3885 self.raise_error(f"Expected table name but got {self._curr}") 3886 if not db and is_db_reference: 3887 self.raise_error(f"Expected database name but got {self._curr}") 3888 3889 table = self.expression( 3890 exp.Table, 3891 comments=comments, 3892 this=table, 3893 db=db, 3894 catalog=catalog, 3895 ) 3896 3897 changes = self._parse_changes() 3898 if changes: 3899 table.set("changes", changes) 3900 3901 at_before = self._parse_historical_data() 3902 if at_before: 3903 table.set("when", at_before) 3904 3905 pivots = self._parse_pivots() 3906 if pivots: 3907 table.set("pivots", pivots) 3908 3909 return table 3910 3911 def _parse_table( 3912 self, 3913 schema: bool = False, 3914 joins: bool = False, 3915 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3916 parse_bracket: bool = False, 3917 is_db_reference: bool = False, 3918 parse_partition: bool = False, 3919 ) -> t.Optional[exp.Expression]: 3920 lateral = self._parse_lateral() 3921 if lateral: 3922 return lateral 3923 3924 unnest = self._parse_unnest() 3925 if unnest: 3926 return unnest 3927 3928 values = self._parse_derived_table_values() 3929 if values: 3930 return values 3931 3932 subquery = self._parse_select(table=True) 3933 if subquery: 3934 if not subquery.args.get("pivots"): 3935 subquery.set("pivots", self._parse_pivots()) 3936 return subquery 3937 3938 bracket = parse_bracket and self._parse_bracket(None) 3939 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3940 3941 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3942 self._parse_table 3943 ) 3944 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3945 3946 only = self._match(TokenType.ONLY) 3947 3948 this = t.cast( 3949 exp.Expression, 3950 bracket 3951 or rows_from 3952 or self._parse_bracket( 3953 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3954 ), 3955 ) 3956 3957 if only: 3958 this.set("only", only) 3959 3960 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3961 self._match_text_seq("*") 3962 3963 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3964 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3965 this.set("partition", self._parse_partition()) 3966 3967 if schema: 3968 return self._parse_schema(this=this) 3969 3970 version = self._parse_version() 3971 3972 if version: 3973 this.set("version", version) 3974 3975 if self.dialect.ALIAS_POST_TABLESAMPLE: 3976 this.set("sample", self._parse_table_sample()) 3977 3978 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3979 if alias: 3980 this.set("alias", alias) 3981 3982 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3983 return self.expression( 3984 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3985 ) 3986 3987 this.set("hints", self._parse_table_hints()) 3988 3989 if not this.args.get("pivots"): 3990 this.set("pivots", self._parse_pivots()) 3991 3992 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3993 this.set("sample", self._parse_table_sample()) 3994 3995 if joins: 3996 for join in self._parse_joins(): 3997 this.append("joins", join) 3998 3999 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4000 this.set("ordinality", True) 4001 this.set("alias", self._parse_table_alias()) 4002 4003 return this 4004 4005 def _parse_version(self) -> t.Optional[exp.Version]: 4006 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4007 this = "TIMESTAMP" 4008 elif self._match(TokenType.VERSION_SNAPSHOT): 4009 this = "VERSION" 4010 else: 4011 return None 4012 4013 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4014 kind = self._prev.text.upper() 4015 start = self._parse_bitwise() 4016 self._match_texts(("TO", "AND")) 4017 end = self._parse_bitwise() 4018 expression: t.Optional[exp.Expression] = self.expression( 4019 exp.Tuple, expressions=[start, end] 4020 ) 4021 elif self._match_text_seq("CONTAINED", "IN"): 4022 kind = "CONTAINED IN" 4023 expression = self.expression( 4024 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4025 ) 4026 elif self._match(TokenType.ALL): 4027 kind = "ALL" 4028 expression = None 4029 else: 4030 self._match_text_seq("AS", "OF") 4031 kind = "AS OF" 4032 expression = self._parse_type() 4033 4034 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4035 4036 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4037 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4038 index = self._index 4039 historical_data = None 4040 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4041 this = self._prev.text.upper() 4042 kind = ( 4043 self._match(TokenType.L_PAREN) 4044 and self._match_texts(self.HISTORICAL_DATA_KIND) 4045 and self._prev.text.upper() 4046 ) 4047 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4048 4049 if expression: 4050 self._match_r_paren() 4051 historical_data = self.expression( 4052 exp.HistoricalData, this=this, kind=kind, expression=expression 4053 ) 4054 else: 4055 self._retreat(index) 4056 4057 return historical_data 4058 4059 def _parse_changes(self) -> t.Optional[exp.Changes]: 4060 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4061 return None 4062 4063 information = self._parse_var(any_token=True) 4064 self._match_r_paren() 4065 4066 return self.expression( 4067 exp.Changes, 4068 information=information, 4069 at_before=self._parse_historical_data(), 4070 end=self._parse_historical_data(), 4071 ) 4072 4073 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4074 if not self._match(TokenType.UNNEST): 4075 return None 4076 4077 expressions = self._parse_wrapped_csv(self._parse_equality) 4078 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4079 4080 alias = self._parse_table_alias() if with_alias else None 4081 4082 if alias: 4083 if self.dialect.UNNEST_COLUMN_ONLY: 4084 if alias.args.get("columns"): 4085 self.raise_error("Unexpected extra column alias in unnest.") 4086 4087 alias.set("columns", [alias.this]) 4088 alias.set("this", None) 4089 4090 columns = alias.args.get("columns") or [] 4091 if offset and len(expressions) < len(columns): 4092 offset = columns.pop() 4093 4094 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4095 self._match(TokenType.ALIAS) 4096 offset = self._parse_id_var( 4097 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4098 ) or exp.to_identifier("offset") 4099 4100 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4101 4102 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4103 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4104 if not is_derived and not ( 4105 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4106 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4107 ): 4108 return None 4109 4110 expressions = self._parse_csv(self._parse_value) 4111 alias = self._parse_table_alias() 4112 4113 if is_derived: 4114 self._match_r_paren() 4115 4116 return self.expression( 4117 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4118 ) 4119 4120 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4121 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4122 as_modifier and self._match_text_seq("USING", "SAMPLE") 4123 ): 4124 return None 4125 4126 bucket_numerator = None 4127 bucket_denominator = None 4128 bucket_field = None 4129 percent = None 4130 size = None 4131 seed = None 4132 4133 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4134 matched_l_paren = self._match(TokenType.L_PAREN) 4135 4136 if self.TABLESAMPLE_CSV: 4137 num = None 4138 expressions = self._parse_csv(self._parse_primary) 4139 else: 4140 expressions = None 4141 num = ( 4142 self._parse_factor() 4143 if self._match(TokenType.NUMBER, advance=False) 4144 else self._parse_primary() or self._parse_placeholder() 4145 ) 4146 4147 if self._match_text_seq("BUCKET"): 4148 bucket_numerator = self._parse_number() 4149 self._match_text_seq("OUT", "OF") 4150 bucket_denominator = bucket_denominator = self._parse_number() 4151 self._match(TokenType.ON) 4152 bucket_field = self._parse_field() 4153 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4154 percent = num 4155 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4156 size = num 4157 else: 4158 percent = num 4159 4160 if matched_l_paren: 4161 self._match_r_paren() 4162 4163 if self._match(TokenType.L_PAREN): 4164 method = self._parse_var(upper=True) 4165 seed = self._match(TokenType.COMMA) and self._parse_number() 4166 self._match_r_paren() 4167 elif self._match_texts(("SEED", "REPEATABLE")): 4168 seed = self._parse_wrapped(self._parse_number) 4169 4170 if not method and self.DEFAULT_SAMPLING_METHOD: 4171 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4172 4173 return self.expression( 4174 exp.TableSample, 4175 expressions=expressions, 4176 method=method, 4177 bucket_numerator=bucket_numerator, 4178 bucket_denominator=bucket_denominator, 4179 bucket_field=bucket_field, 4180 percent=percent, 4181 size=size, 4182 seed=seed, 4183 ) 4184 4185 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4186 return list(iter(self._parse_pivot, None)) or None 4187 4188 def _parse_joins(self) -> t.Iterator[exp.Join]: 4189 return iter(self._parse_join, None) 4190 4191 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4192 if not self._match(TokenType.INTO): 4193 return None 4194 4195 return self.expression( 4196 exp.UnpivotColumns, 4197 this=self._match_text_seq("NAME") and self._parse_column(), 4198 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4199 ) 4200 4201 # https://duckdb.org/docs/sql/statements/pivot 4202 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4203 def _parse_on() -> t.Optional[exp.Expression]: 4204 this = self._parse_bitwise() 4205 4206 if self._match(TokenType.IN): 4207 # PIVOT ... ON col IN (row_val1, row_val2) 4208 return self._parse_in(this) 4209 if self._match(TokenType.ALIAS, advance=False): 4210 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4211 return self._parse_alias(this) 4212 4213 return this 4214 4215 this = self._parse_table() 4216 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4217 into = self._parse_unpivot_columns() 4218 using = self._match(TokenType.USING) and self._parse_csv( 4219 lambda: self._parse_alias(self._parse_function()) 4220 ) 4221 group = self._parse_group() 4222 4223 return self.expression( 4224 exp.Pivot, 4225 this=this, 4226 expressions=expressions, 4227 using=using, 4228 group=group, 4229 unpivot=is_unpivot, 4230 into=into, 4231 ) 4232 4233 def _parse_pivot_in(self) -> exp.In: 4234 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4235 this = self._parse_select_or_expression() 4236 4237 self._match(TokenType.ALIAS) 4238 alias = self._parse_bitwise() 4239 if alias: 4240 if isinstance(alias, exp.Column) and not alias.db: 4241 alias = alias.this 4242 return self.expression(exp.PivotAlias, this=this, alias=alias) 4243 4244 return this 4245 4246 value = self._parse_column() 4247 4248 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4249 self.raise_error("Expecting IN (") 4250 4251 if self._match(TokenType.ANY): 4252 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4253 else: 4254 exprs = self._parse_csv(_parse_aliased_expression) 4255 4256 self._match_r_paren() 4257 return self.expression(exp.In, this=value, expressions=exprs) 4258 4259 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4260 index = self._index 4261 include_nulls = None 4262 4263 if self._match(TokenType.PIVOT): 4264 unpivot = False 4265 elif self._match(TokenType.UNPIVOT): 4266 unpivot = True 4267 4268 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4269 if self._match_text_seq("INCLUDE", "NULLS"): 4270 include_nulls = True 4271 elif self._match_text_seq("EXCLUDE", "NULLS"): 4272 include_nulls = False 4273 else: 4274 return None 4275 4276 expressions = [] 4277 4278 if not self._match(TokenType.L_PAREN): 4279 self._retreat(index) 4280 return None 4281 4282 if unpivot: 4283 expressions = self._parse_csv(self._parse_column) 4284 else: 4285 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4286 4287 if not expressions: 4288 self.raise_error("Failed to parse PIVOT's aggregation list") 4289 4290 if not self._match(TokenType.FOR): 4291 self.raise_error("Expecting FOR") 4292 4293 fields = [] 4294 while True: 4295 field = self._try_parse(self._parse_pivot_in) 4296 if not field: 4297 break 4298 fields.append(field) 4299 4300 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4301 self._parse_bitwise 4302 ) 4303 4304 group = self._parse_group() 4305 4306 self._match_r_paren() 4307 4308 pivot = self.expression( 4309 exp.Pivot, 4310 expressions=expressions, 4311 fields=fields, 4312 unpivot=unpivot, 4313 include_nulls=include_nulls, 4314 default_on_null=default_on_null, 4315 group=group, 4316 ) 4317 4318 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4319 pivot.set("alias", self._parse_table_alias()) 4320 4321 if not unpivot: 4322 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4323 4324 columns: t.List[exp.Expression] = [] 4325 all_fields = [] 4326 for pivot_field in pivot.fields: 4327 pivot_field_expressions = pivot_field.expressions 4328 4329 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4330 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4331 continue 4332 4333 all_fields.append( 4334 [ 4335 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4336 for fld in pivot_field_expressions 4337 ] 4338 ) 4339 4340 if all_fields: 4341 if names: 4342 all_fields.append(names) 4343 4344 # Generate all possible combinations of the pivot columns 4345 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4346 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4347 for fld_parts_tuple in itertools.product(*all_fields): 4348 fld_parts = list(fld_parts_tuple) 4349 4350 if names and self.PREFIXED_PIVOT_COLUMNS: 4351 # Move the "name" to the front of the list 4352 fld_parts.insert(0, fld_parts.pop(-1)) 4353 4354 columns.append(exp.to_identifier("_".join(fld_parts))) 4355 4356 pivot.set("columns", columns) 4357 4358 return pivot 4359 4360 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4361 return [agg.alias for agg in aggregations if agg.alias] 4362 4363 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4364 if not skip_where_token and not self._match(TokenType.PREWHERE): 4365 return None 4366 4367 return self.expression( 4368 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4369 ) 4370 4371 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4372 if not skip_where_token and not self._match(TokenType.WHERE): 4373 return None 4374 4375 return self.expression( 4376 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4377 ) 4378 4379 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4380 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4381 return None 4382 4383 elements: t.Dict[str, t.Any] = defaultdict(list) 4384 4385 if self._match(TokenType.ALL): 4386 elements["all"] = True 4387 elif self._match(TokenType.DISTINCT): 4388 elements["all"] = False 4389 4390 while True: 4391 index = self._index 4392 4393 elements["expressions"].extend( 4394 self._parse_csv( 4395 lambda: None 4396 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4397 else self._parse_assignment() 4398 ) 4399 ) 4400 4401 before_with_index = self._index 4402 with_prefix = self._match(TokenType.WITH) 4403 4404 if self._match(TokenType.ROLLUP): 4405 elements["rollup"].append( 4406 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4407 ) 4408 elif self._match(TokenType.CUBE): 4409 elements["cube"].append( 4410 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4411 ) 4412 elif self._match(TokenType.GROUPING_SETS): 4413 elements["grouping_sets"].append( 4414 self.expression( 4415 exp.GroupingSets, 4416 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4417 ) 4418 ) 4419 elif self._match_text_seq("TOTALS"): 4420 elements["totals"] = True # type: ignore 4421 4422 if before_with_index <= self._index <= before_with_index + 1: 4423 self._retreat(before_with_index) 4424 break 4425 4426 if index == self._index: 4427 break 4428 4429 return self.expression(exp.Group, **elements) # type: ignore 4430 4431 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4432 return self.expression( 4433 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4434 ) 4435 4436 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4437 if self._match(TokenType.L_PAREN): 4438 grouping_set = self._parse_csv(self._parse_column) 4439 self._match_r_paren() 4440 return self.expression(exp.Tuple, expressions=grouping_set) 4441 4442 return self._parse_column() 4443 4444 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4445 if not skip_having_token and not self._match(TokenType.HAVING): 4446 return None 4447 return self.expression(exp.Having, this=self._parse_assignment()) 4448 4449 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4450 if not self._match(TokenType.QUALIFY): 4451 return None 4452 return self.expression(exp.Qualify, this=self._parse_assignment()) 4453 4454 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4455 if skip_start_token: 4456 start = None 4457 elif self._match(TokenType.START_WITH): 4458 start = self._parse_assignment() 4459 else: 4460 return None 4461 4462 self._match(TokenType.CONNECT_BY) 4463 nocycle = self._match_text_seq("NOCYCLE") 4464 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4465 exp.Prior, this=self._parse_bitwise() 4466 ) 4467 connect = self._parse_assignment() 4468 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4469 4470 if not start and self._match(TokenType.START_WITH): 4471 start = self._parse_assignment() 4472 4473 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4474 4475 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4476 this = self._parse_id_var(any_token=True) 4477 if self._match(TokenType.ALIAS): 4478 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4479 return this 4480 4481 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4482 if self._match_text_seq("INTERPOLATE"): 4483 return self._parse_wrapped_csv(self._parse_name_as_expression) 4484 return None 4485 4486 def _parse_order( 4487 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4488 ) -> t.Optional[exp.Expression]: 4489 siblings = None 4490 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4491 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4492 return this 4493 4494 siblings = True 4495 4496 return self.expression( 4497 exp.Order, 4498 this=this, 4499 expressions=self._parse_csv(self._parse_ordered), 4500 siblings=siblings, 4501 ) 4502 4503 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4504 if not self._match(token): 4505 return None 4506 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4507 4508 def _parse_ordered( 4509 self, parse_method: t.Optional[t.Callable] = None 4510 ) -> t.Optional[exp.Ordered]: 4511 this = parse_method() if parse_method else self._parse_assignment() 4512 if not this: 4513 return None 4514 4515 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4516 this = exp.var("ALL") 4517 4518 asc = self._match(TokenType.ASC) 4519 desc = self._match(TokenType.DESC) or (asc and False) 4520 4521 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4522 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4523 4524 nulls_first = is_nulls_first or False 4525 explicitly_null_ordered = is_nulls_first or is_nulls_last 4526 4527 if ( 4528 not explicitly_null_ordered 4529 and ( 4530 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4531 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4532 ) 4533 and self.dialect.NULL_ORDERING != "nulls_are_last" 4534 ): 4535 nulls_first = True 4536 4537 if self._match_text_seq("WITH", "FILL"): 4538 with_fill = self.expression( 4539 exp.WithFill, 4540 **{ # type: ignore 4541 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4542 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4543 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4544 "interpolate": self._parse_interpolate(), 4545 }, 4546 ) 4547 else: 4548 with_fill = None 4549 4550 return self.expression( 4551 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4552 ) 4553 4554 def _parse_limit_options(self) -> exp.LimitOptions: 4555 percent = self._match(TokenType.PERCENT) 4556 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4557 self._match_text_seq("ONLY") 4558 with_ties = self._match_text_seq("WITH", "TIES") 4559 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4560 4561 def _parse_limit( 4562 self, 4563 this: t.Optional[exp.Expression] = None, 4564 top: bool = False, 4565 skip_limit_token: bool = False, 4566 ) -> t.Optional[exp.Expression]: 4567 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4568 comments = self._prev_comments 4569 if top: 4570 limit_paren = self._match(TokenType.L_PAREN) 4571 expression = self._parse_term() if limit_paren else self._parse_number() 4572 4573 if limit_paren: 4574 self._match_r_paren() 4575 4576 limit_options = self._parse_limit_options() 4577 else: 4578 limit_options = None 4579 expression = self._parse_term() 4580 4581 if self._match(TokenType.COMMA): 4582 offset = expression 4583 expression = self._parse_term() 4584 else: 4585 offset = None 4586 4587 limit_exp = self.expression( 4588 exp.Limit, 4589 this=this, 4590 expression=expression, 4591 offset=offset, 4592 comments=comments, 4593 limit_options=limit_options, 4594 expressions=self._parse_limit_by(), 4595 ) 4596 4597 return limit_exp 4598 4599 if self._match(TokenType.FETCH): 4600 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4601 direction = self._prev.text.upper() if direction else "FIRST" 4602 4603 count = self._parse_field(tokens=self.FETCH_TOKENS) 4604 4605 return self.expression( 4606 exp.Fetch, 4607 direction=direction, 4608 count=count, 4609 limit_options=self._parse_limit_options(), 4610 ) 4611 4612 return this 4613 4614 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4615 if not self._match(TokenType.OFFSET): 4616 return this 4617 4618 count = self._parse_term() 4619 self._match_set((TokenType.ROW, TokenType.ROWS)) 4620 4621 return self.expression( 4622 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4623 ) 4624 4625 def _can_parse_limit_or_offset(self) -> bool: 4626 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4627 return False 4628 4629 index = self._index 4630 result = bool( 4631 self._try_parse(self._parse_limit, retreat=True) 4632 or self._try_parse(self._parse_offset, retreat=True) 4633 ) 4634 self._retreat(index) 4635 return result 4636 4637 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4638 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4639 4640 def _parse_locks(self) -> t.List[exp.Lock]: 4641 locks = [] 4642 while True: 4643 if self._match_text_seq("FOR", "UPDATE"): 4644 update = True 4645 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4646 "LOCK", "IN", "SHARE", "MODE" 4647 ): 4648 update = False 4649 else: 4650 break 4651 4652 expressions = None 4653 if self._match_text_seq("OF"): 4654 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4655 4656 wait: t.Optional[bool | exp.Expression] = None 4657 if self._match_text_seq("NOWAIT"): 4658 wait = True 4659 elif self._match_text_seq("WAIT"): 4660 wait = self._parse_primary() 4661 elif self._match_text_seq("SKIP", "LOCKED"): 4662 wait = False 4663 4664 locks.append( 4665 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4666 ) 4667 4668 return locks 4669 4670 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4671 start = self._index 4672 _, side_token, kind_token = self._parse_join_parts() 4673 4674 side = side_token.text if side_token else None 4675 kind = kind_token.text if kind_token else None 4676 4677 if not self._match_set(self.SET_OPERATIONS): 4678 self._retreat(start) 4679 return None 4680 4681 token_type = self._prev.token_type 4682 4683 if token_type == TokenType.UNION: 4684 operation: t.Type[exp.SetOperation] = exp.Union 4685 elif token_type == TokenType.EXCEPT: 4686 operation = exp.Except 4687 else: 4688 operation = exp.Intersect 4689 4690 comments = self._prev.comments 4691 4692 if self._match(TokenType.DISTINCT): 4693 distinct: t.Optional[bool] = True 4694 elif self._match(TokenType.ALL): 4695 distinct = False 4696 else: 4697 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4698 if distinct is None: 4699 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4700 4701 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4702 "STRICT", "CORRESPONDING" 4703 ) 4704 if self._match_text_seq("CORRESPONDING"): 4705 by_name = True 4706 if not side and not kind: 4707 kind = "INNER" 4708 4709 on_column_list = None 4710 if by_name and self._match_texts(("ON", "BY")): 4711 on_column_list = self._parse_wrapped_csv(self._parse_column) 4712 4713 expression = self._parse_select(nested=True, parse_set_operation=False) 4714 4715 return self.expression( 4716 operation, 4717 comments=comments, 4718 this=this, 4719 distinct=distinct, 4720 by_name=by_name, 4721 expression=expression, 4722 side=side, 4723 kind=kind, 4724 on=on_column_list, 4725 ) 4726 4727 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4728 while True: 4729 setop = self.parse_set_operation(this) 4730 if not setop: 4731 break 4732 this = setop 4733 4734 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4735 expression = this.expression 4736 4737 if expression: 4738 for arg in self.SET_OP_MODIFIERS: 4739 expr = expression.args.get(arg) 4740 if expr: 4741 this.set(arg, expr.pop()) 4742 4743 return this 4744 4745 def _parse_expression(self) -> t.Optional[exp.Expression]: 4746 return self._parse_alias(self._parse_assignment()) 4747 4748 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4749 this = self._parse_disjunction() 4750 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4751 # This allows us to parse <non-identifier token> := <expr> 4752 this = exp.column( 4753 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4754 ) 4755 4756 while self._match_set(self.ASSIGNMENT): 4757 if isinstance(this, exp.Column) and len(this.parts) == 1: 4758 this = this.this 4759 4760 this = self.expression( 4761 self.ASSIGNMENT[self._prev.token_type], 4762 this=this, 4763 comments=self._prev_comments, 4764 expression=self._parse_assignment(), 4765 ) 4766 4767 return this 4768 4769 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4770 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4771 4772 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4773 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4774 4775 def _parse_equality(self) -> t.Optional[exp.Expression]: 4776 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4777 4778 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4779 return self._parse_tokens(self._parse_range, self.COMPARISON) 4780 4781 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4782 this = this or self._parse_bitwise() 4783 negate = self._match(TokenType.NOT) 4784 4785 if self._match_set(self.RANGE_PARSERS): 4786 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4787 if not expression: 4788 return this 4789 4790 this = expression 4791 elif self._match(TokenType.ISNULL): 4792 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4793 4794 # Postgres supports ISNULL and NOTNULL for conditions. 4795 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4796 if self._match(TokenType.NOTNULL): 4797 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4798 this = self.expression(exp.Not, this=this) 4799 4800 if negate: 4801 this = self._negate_range(this) 4802 4803 if self._match(TokenType.IS): 4804 this = self._parse_is(this) 4805 4806 return this 4807 4808 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4809 if not this: 4810 return this 4811 4812 return self.expression(exp.Not, this=this) 4813 4814 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4815 index = self._index - 1 4816 negate = self._match(TokenType.NOT) 4817 4818 if self._match_text_seq("DISTINCT", "FROM"): 4819 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4820 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4821 4822 if self._match(TokenType.JSON): 4823 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4824 4825 if self._match_text_seq("WITH"): 4826 _with = True 4827 elif self._match_text_seq("WITHOUT"): 4828 _with = False 4829 else: 4830 _with = None 4831 4832 unique = self._match(TokenType.UNIQUE) 4833 self._match_text_seq("KEYS") 4834 expression: t.Optional[exp.Expression] = self.expression( 4835 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4836 ) 4837 else: 4838 expression = self._parse_primary() or self._parse_null() 4839 if not expression: 4840 self._retreat(index) 4841 return None 4842 4843 this = self.expression(exp.Is, this=this, expression=expression) 4844 return self.expression(exp.Not, this=this) if negate else this 4845 4846 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4847 unnest = self._parse_unnest(with_alias=False) 4848 if unnest: 4849 this = self.expression(exp.In, this=this, unnest=unnest) 4850 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4851 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4852 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4853 4854 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4855 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4856 else: 4857 this = self.expression(exp.In, this=this, expressions=expressions) 4858 4859 if matched_l_paren: 4860 self._match_r_paren(this) 4861 elif not self._match(TokenType.R_BRACKET, expression=this): 4862 self.raise_error("Expecting ]") 4863 else: 4864 this = self.expression(exp.In, this=this, field=self._parse_column()) 4865 4866 return this 4867 4868 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4869 low = self._parse_bitwise() 4870 self._match(TokenType.AND) 4871 high = self._parse_bitwise() 4872 return self.expression(exp.Between, this=this, low=low, high=high) 4873 4874 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4875 if not self._match(TokenType.ESCAPE): 4876 return this 4877 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4878 4879 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4880 index = self._index 4881 4882 if not self._match(TokenType.INTERVAL) and match_interval: 4883 return None 4884 4885 if self._match(TokenType.STRING, advance=False): 4886 this = self._parse_primary() 4887 else: 4888 this = self._parse_term() 4889 4890 if not this or ( 4891 isinstance(this, exp.Column) 4892 and not this.table 4893 and not this.this.quoted 4894 and this.name.upper() == "IS" 4895 ): 4896 self._retreat(index) 4897 return None 4898 4899 unit = self._parse_function() or ( 4900 not self._match(TokenType.ALIAS, advance=False) 4901 and self._parse_var(any_token=True, upper=True) 4902 ) 4903 4904 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4905 # each INTERVAL expression into this canonical form so it's easy to transpile 4906 if this and this.is_number: 4907 this = exp.Literal.string(this.to_py()) 4908 elif this and this.is_string: 4909 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4910 if parts and unit: 4911 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4912 unit = None 4913 self._retreat(self._index - 1) 4914 4915 if len(parts) == 1: 4916 this = exp.Literal.string(parts[0][0]) 4917 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4918 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4919 unit = self.expression( 4920 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4921 ) 4922 4923 interval = self.expression(exp.Interval, this=this, unit=unit) 4924 4925 index = self._index 4926 self._match(TokenType.PLUS) 4927 4928 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4929 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4930 return self.expression( 4931 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4932 ) 4933 4934 self._retreat(index) 4935 return interval 4936 4937 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4938 this = self._parse_term() 4939 4940 while True: 4941 if self._match_set(self.BITWISE): 4942 this = self.expression( 4943 self.BITWISE[self._prev.token_type], 4944 this=this, 4945 expression=self._parse_term(), 4946 ) 4947 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4948 this = self.expression( 4949 exp.DPipe, 4950 this=this, 4951 expression=self._parse_term(), 4952 safe=not self.dialect.STRICT_STRING_CONCAT, 4953 ) 4954 elif self._match(TokenType.DQMARK): 4955 this = self.expression( 4956 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4957 ) 4958 elif self._match_pair(TokenType.LT, TokenType.LT): 4959 this = self.expression( 4960 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4961 ) 4962 elif self._match_pair(TokenType.GT, TokenType.GT): 4963 this = self.expression( 4964 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4965 ) 4966 else: 4967 break 4968 4969 return this 4970 4971 def _parse_term(self) -> t.Optional[exp.Expression]: 4972 this = self._parse_factor() 4973 4974 while self._match_set(self.TERM): 4975 klass = self.TERM[self._prev.token_type] 4976 comments = self._prev_comments 4977 expression = self._parse_factor() 4978 4979 this = self.expression(klass, this=this, comments=comments, expression=expression) 4980 4981 if isinstance(this, exp.Collate): 4982 expr = this.expression 4983 4984 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4985 # fallback to Identifier / Var 4986 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4987 ident = expr.this 4988 if isinstance(ident, exp.Identifier): 4989 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4990 4991 return this 4992 4993 def _parse_factor(self) -> t.Optional[exp.Expression]: 4994 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4995 this = parse_method() 4996 4997 while self._match_set(self.FACTOR): 4998 klass = self.FACTOR[self._prev.token_type] 4999 comments = self._prev_comments 5000 expression = parse_method() 5001 5002 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5003 self._retreat(self._index - 1) 5004 return this 5005 5006 this = self.expression(klass, this=this, comments=comments, expression=expression) 5007 5008 if isinstance(this, exp.Div): 5009 this.args["typed"] = self.dialect.TYPED_DIVISION 5010 this.args["safe"] = self.dialect.SAFE_DIVISION 5011 5012 return this 5013 5014 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5015 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5016 5017 def _parse_unary(self) -> t.Optional[exp.Expression]: 5018 if self._match_set(self.UNARY_PARSERS): 5019 return self.UNARY_PARSERS[self._prev.token_type](self) 5020 return self._parse_at_time_zone(self._parse_type()) 5021 5022 def _parse_type( 5023 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5024 ) -> t.Optional[exp.Expression]: 5025 interval = parse_interval and self._parse_interval() 5026 if interval: 5027 return interval 5028 5029 index = self._index 5030 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5031 5032 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5033 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5034 if isinstance(data_type, exp.Cast): 5035 # This constructor can contain ops directly after it, for instance struct unnesting: 5036 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5037 return self._parse_column_ops(data_type) 5038 5039 if data_type: 5040 index2 = self._index 5041 this = self._parse_primary() 5042 5043 if isinstance(this, exp.Literal): 5044 this = self._parse_column_ops(this) 5045 5046 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5047 if parser: 5048 return parser(self, this, data_type) 5049 5050 return self.expression(exp.Cast, this=this, to=data_type) 5051 5052 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5053 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5054 # 5055 # If the index difference here is greater than 1, that means the parser itself must have 5056 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5057 # 5058 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5059 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5060 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5061 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5062 # 5063 # In these cases, we don't really want to return the converted type, but instead retreat 5064 # and try to parse a Column or Identifier in the section below. 5065 if data_type.expressions and index2 - index > 1: 5066 self._retreat(index2) 5067 return self._parse_column_ops(data_type) 5068 5069 self._retreat(index) 5070 5071 if fallback_to_identifier: 5072 return self._parse_id_var() 5073 5074 this = self._parse_column() 5075 return this and self._parse_column_ops(this) 5076 5077 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5078 this = self._parse_type() 5079 if not this: 5080 return None 5081 5082 if isinstance(this, exp.Column) and not this.table: 5083 this = exp.var(this.name.upper()) 5084 5085 return self.expression( 5086 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5087 ) 5088 5089 def _parse_types( 5090 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5091 ) -> t.Optional[exp.Expression]: 5092 index = self._index 5093 5094 this: t.Optional[exp.Expression] = None 5095 prefix = self._match_text_seq("SYSUDTLIB", ".") 5096 5097 if not self._match_set(self.TYPE_TOKENS): 5098 identifier = allow_identifiers and self._parse_id_var( 5099 any_token=False, tokens=(TokenType.VAR,) 5100 ) 5101 if isinstance(identifier, exp.Identifier): 5102 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5103 5104 if len(tokens) != 1: 5105 self.raise_error("Unexpected identifier", self._prev) 5106 5107 if tokens[0].token_type in self.TYPE_TOKENS: 5108 self._prev = tokens[0] 5109 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5110 type_name = identifier.name 5111 5112 while self._match(TokenType.DOT): 5113 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5114 5115 this = exp.DataType.build(type_name, udt=True) 5116 else: 5117 self._retreat(self._index - 1) 5118 return None 5119 else: 5120 return None 5121 5122 type_token = self._prev.token_type 5123 5124 if type_token == TokenType.PSEUDO_TYPE: 5125 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5126 5127 if type_token == TokenType.OBJECT_IDENTIFIER: 5128 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5129 5130 # https://materialize.com/docs/sql/types/map/ 5131 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5132 key_type = self._parse_types( 5133 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5134 ) 5135 if not self._match(TokenType.FARROW): 5136 self._retreat(index) 5137 return None 5138 5139 value_type = self._parse_types( 5140 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5141 ) 5142 if not self._match(TokenType.R_BRACKET): 5143 self._retreat(index) 5144 return None 5145 5146 return exp.DataType( 5147 this=exp.DataType.Type.MAP, 5148 expressions=[key_type, value_type], 5149 nested=True, 5150 prefix=prefix, 5151 ) 5152 5153 nested = type_token in self.NESTED_TYPE_TOKENS 5154 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5155 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5156 expressions = None 5157 maybe_func = False 5158 5159 if self._match(TokenType.L_PAREN): 5160 if is_struct: 5161 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5162 elif nested: 5163 expressions = self._parse_csv( 5164 lambda: self._parse_types( 5165 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5166 ) 5167 ) 5168 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5169 this = expressions[0] 5170 this.set("nullable", True) 5171 self._match_r_paren() 5172 return this 5173 elif type_token in self.ENUM_TYPE_TOKENS: 5174 expressions = self._parse_csv(self._parse_equality) 5175 elif is_aggregate: 5176 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5177 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5178 ) 5179 if not func_or_ident: 5180 return None 5181 expressions = [func_or_ident] 5182 if self._match(TokenType.COMMA): 5183 expressions.extend( 5184 self._parse_csv( 5185 lambda: self._parse_types( 5186 check_func=check_func, 5187 schema=schema, 5188 allow_identifiers=allow_identifiers, 5189 ) 5190 ) 5191 ) 5192 else: 5193 expressions = self._parse_csv(self._parse_type_size) 5194 5195 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5196 if type_token == TokenType.VECTOR and len(expressions) == 2: 5197 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5198 5199 if not expressions or not self._match(TokenType.R_PAREN): 5200 self._retreat(index) 5201 return None 5202 5203 maybe_func = True 5204 5205 values: t.Optional[t.List[exp.Expression]] = None 5206 5207 if nested and self._match(TokenType.LT): 5208 if is_struct: 5209 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5210 else: 5211 expressions = self._parse_csv( 5212 lambda: self._parse_types( 5213 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5214 ) 5215 ) 5216 5217 if not self._match(TokenType.GT): 5218 self.raise_error("Expecting >") 5219 5220 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5221 values = self._parse_csv(self._parse_assignment) 5222 if not values and is_struct: 5223 values = None 5224 self._retreat(self._index - 1) 5225 else: 5226 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5227 5228 if type_token in self.TIMESTAMPS: 5229 if self._match_text_seq("WITH", "TIME", "ZONE"): 5230 maybe_func = False 5231 tz_type = ( 5232 exp.DataType.Type.TIMETZ 5233 if type_token in self.TIMES 5234 else exp.DataType.Type.TIMESTAMPTZ 5235 ) 5236 this = exp.DataType(this=tz_type, expressions=expressions) 5237 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5238 maybe_func = False 5239 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5240 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5241 maybe_func = False 5242 elif type_token == TokenType.INTERVAL: 5243 unit = self._parse_var(upper=True) 5244 if unit: 5245 if self._match_text_seq("TO"): 5246 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5247 5248 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5249 else: 5250 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5251 elif type_token == TokenType.VOID: 5252 this = exp.DataType(this=exp.DataType.Type.NULL) 5253 5254 if maybe_func and check_func: 5255 index2 = self._index 5256 peek = self._parse_string() 5257 5258 if not peek: 5259 self._retreat(index) 5260 return None 5261 5262 self._retreat(index2) 5263 5264 if not this: 5265 if self._match_text_seq("UNSIGNED"): 5266 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5267 if not unsigned_type_token: 5268 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5269 5270 type_token = unsigned_type_token or type_token 5271 5272 this = exp.DataType( 5273 this=exp.DataType.Type[type_token.value], 5274 expressions=expressions, 5275 nested=nested, 5276 prefix=prefix, 5277 ) 5278 5279 # Empty arrays/structs are allowed 5280 if values is not None: 5281 cls = exp.Struct if is_struct else exp.Array 5282 this = exp.cast(cls(expressions=values), this, copy=False) 5283 5284 elif expressions: 5285 this.set("expressions", expressions) 5286 5287 # https://materialize.com/docs/sql/types/list/#type-name 5288 while self._match(TokenType.LIST): 5289 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5290 5291 index = self._index 5292 5293 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5294 matched_array = self._match(TokenType.ARRAY) 5295 5296 while self._curr: 5297 datatype_token = self._prev.token_type 5298 matched_l_bracket = self._match(TokenType.L_BRACKET) 5299 5300 if (not matched_l_bracket and not matched_array) or ( 5301 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5302 ): 5303 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5304 # not to be confused with the fixed size array parsing 5305 break 5306 5307 matched_array = False 5308 values = self._parse_csv(self._parse_assignment) or None 5309 if ( 5310 values 5311 and not schema 5312 and ( 5313 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5314 ) 5315 ): 5316 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5317 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5318 self._retreat(index) 5319 break 5320 5321 this = exp.DataType( 5322 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5323 ) 5324 self._match(TokenType.R_BRACKET) 5325 5326 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5327 converter = self.TYPE_CONVERTERS.get(this.this) 5328 if converter: 5329 this = converter(t.cast(exp.DataType, this)) 5330 5331 return this 5332 5333 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5334 index = self._index 5335 5336 if ( 5337 self._curr 5338 and self._next 5339 and self._curr.token_type in self.TYPE_TOKENS 5340 and self._next.token_type in self.TYPE_TOKENS 5341 ): 5342 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5343 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5344 this = self._parse_id_var() 5345 else: 5346 this = ( 5347 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5348 or self._parse_id_var() 5349 ) 5350 5351 self._match(TokenType.COLON) 5352 5353 if ( 5354 type_required 5355 and not isinstance(this, exp.DataType) 5356 and not self._match_set(self.TYPE_TOKENS, advance=False) 5357 ): 5358 self._retreat(index) 5359 return self._parse_types() 5360 5361 return self._parse_column_def(this) 5362 5363 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5364 if not self._match_text_seq("AT", "TIME", "ZONE"): 5365 return this 5366 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5367 5368 def _parse_column(self) -> t.Optional[exp.Expression]: 5369 this = self._parse_column_reference() 5370 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5371 5372 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5373 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5374 5375 return column 5376 5377 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5378 this = self._parse_field() 5379 if ( 5380 not this 5381 and self._match(TokenType.VALUES, advance=False) 5382 and self.VALUES_FOLLOWED_BY_PAREN 5383 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5384 ): 5385 this = self._parse_id_var() 5386 5387 if isinstance(this, exp.Identifier): 5388 # We bubble up comments from the Identifier to the Column 5389 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5390 5391 return this 5392 5393 def _parse_colon_as_variant_extract( 5394 self, this: t.Optional[exp.Expression] 5395 ) -> t.Optional[exp.Expression]: 5396 casts = [] 5397 json_path = [] 5398 escape = None 5399 5400 while self._match(TokenType.COLON): 5401 start_index = self._index 5402 5403 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5404 path = self._parse_column_ops( 5405 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5406 ) 5407 5408 # The cast :: operator has a lower precedence than the extraction operator :, so 5409 # we rearrange the AST appropriately to avoid casting the JSON path 5410 while isinstance(path, exp.Cast): 5411 casts.append(path.to) 5412 path = path.this 5413 5414 if casts: 5415 dcolon_offset = next( 5416 i 5417 for i, t in enumerate(self._tokens[start_index:]) 5418 if t.token_type == TokenType.DCOLON 5419 ) 5420 end_token = self._tokens[start_index + dcolon_offset - 1] 5421 else: 5422 end_token = self._prev 5423 5424 if path: 5425 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5426 # it'll roundtrip to a string literal in GET_PATH 5427 if isinstance(path, exp.Identifier) and path.quoted: 5428 escape = True 5429 5430 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5431 5432 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5433 # Databricks transforms it back to the colon/dot notation 5434 if json_path: 5435 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5436 5437 if json_path_expr: 5438 json_path_expr.set("escape", escape) 5439 5440 this = self.expression( 5441 exp.JSONExtract, 5442 this=this, 5443 expression=json_path_expr, 5444 variant_extract=True, 5445 ) 5446 5447 while casts: 5448 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5449 5450 return this 5451 5452 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5453 return self._parse_types() 5454 5455 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5456 this = self._parse_bracket(this) 5457 5458 while self._match_set(self.COLUMN_OPERATORS): 5459 op_token = self._prev.token_type 5460 op = self.COLUMN_OPERATORS.get(op_token) 5461 5462 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5463 field = self._parse_dcolon() 5464 if not field: 5465 self.raise_error("Expected type") 5466 elif op and self._curr: 5467 field = self._parse_column_reference() or self._parse_bracket() 5468 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5469 field = self._parse_column_ops(field) 5470 else: 5471 field = self._parse_field(any_token=True, anonymous_func=True) 5472 5473 if isinstance(field, (exp.Func, exp.Window)) and this: 5474 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5475 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5476 this = exp.replace_tree( 5477 this, 5478 lambda n: ( 5479 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5480 if n.table 5481 else n.this 5482 ) 5483 if isinstance(n, exp.Column) 5484 else n, 5485 ) 5486 5487 if op: 5488 this = op(self, this, field) 5489 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5490 this = self.expression( 5491 exp.Column, 5492 comments=this.comments, 5493 this=field, 5494 table=this.this, 5495 db=this.args.get("table"), 5496 catalog=this.args.get("db"), 5497 ) 5498 elif isinstance(field, exp.Window): 5499 # Move the exp.Dot's to the window's function 5500 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5501 field.set("this", window_func) 5502 this = field 5503 else: 5504 this = self.expression(exp.Dot, this=this, expression=field) 5505 5506 if field and field.comments: 5507 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5508 5509 this = self._parse_bracket(this) 5510 5511 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5512 5513 def _parse_primary(self) -> t.Optional[exp.Expression]: 5514 if self._match_set(self.PRIMARY_PARSERS): 5515 token_type = self._prev.token_type 5516 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5517 5518 if token_type == TokenType.STRING: 5519 expressions = [primary] 5520 while self._match(TokenType.STRING): 5521 expressions.append(exp.Literal.string(self._prev.text)) 5522 5523 if len(expressions) > 1: 5524 return self.expression(exp.Concat, expressions=expressions) 5525 5526 return primary 5527 5528 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5529 return exp.Literal.number(f"0.{self._prev.text}") 5530 5531 if self._match(TokenType.L_PAREN): 5532 comments = self._prev_comments 5533 query = self._parse_select() 5534 5535 if query: 5536 expressions = [query] 5537 else: 5538 expressions = self._parse_expressions() 5539 5540 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5541 5542 if not this and self._match(TokenType.R_PAREN, advance=False): 5543 this = self.expression(exp.Tuple) 5544 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5545 this = self._parse_subquery(this=this, parse_alias=False) 5546 elif isinstance(this, exp.Subquery): 5547 this = self._parse_subquery( 5548 this=self._parse_set_operations(this), parse_alias=False 5549 ) 5550 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5551 this = self.expression(exp.Tuple, expressions=expressions) 5552 else: 5553 this = self.expression(exp.Paren, this=this) 5554 5555 if this: 5556 this.add_comments(comments) 5557 5558 self._match_r_paren(expression=this) 5559 return this 5560 5561 return None 5562 5563 def _parse_field( 5564 self, 5565 any_token: bool = False, 5566 tokens: t.Optional[t.Collection[TokenType]] = None, 5567 anonymous_func: bool = False, 5568 ) -> t.Optional[exp.Expression]: 5569 if anonymous_func: 5570 field = ( 5571 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5572 or self._parse_primary() 5573 ) 5574 else: 5575 field = self._parse_primary() or self._parse_function( 5576 anonymous=anonymous_func, any_token=any_token 5577 ) 5578 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5579 5580 def _parse_function( 5581 self, 5582 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5583 anonymous: bool = False, 5584 optional_parens: bool = True, 5585 any_token: bool = False, 5586 ) -> t.Optional[exp.Expression]: 5587 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5588 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5589 fn_syntax = False 5590 if ( 5591 self._match(TokenType.L_BRACE, advance=False) 5592 and self._next 5593 and self._next.text.upper() == "FN" 5594 ): 5595 self._advance(2) 5596 fn_syntax = True 5597 5598 func = self._parse_function_call( 5599 functions=functions, 5600 anonymous=anonymous, 5601 optional_parens=optional_parens, 5602 any_token=any_token, 5603 ) 5604 5605 if fn_syntax: 5606 self._match(TokenType.R_BRACE) 5607 5608 return func 5609 5610 def _parse_function_call( 5611 self, 5612 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5613 anonymous: bool = False, 5614 optional_parens: bool = True, 5615 any_token: bool = False, 5616 ) -> t.Optional[exp.Expression]: 5617 if not self._curr: 5618 return None 5619 5620 comments = self._curr.comments 5621 token = self._curr 5622 token_type = self._curr.token_type 5623 this = self._curr.text 5624 upper = this.upper() 5625 5626 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5627 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5628 self._advance() 5629 return self._parse_window(parser(self)) 5630 5631 if not self._next or self._next.token_type != TokenType.L_PAREN: 5632 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5633 self._advance() 5634 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5635 5636 return None 5637 5638 if any_token: 5639 if token_type in self.RESERVED_TOKENS: 5640 return None 5641 elif token_type not in self.FUNC_TOKENS: 5642 return None 5643 5644 self._advance(2) 5645 5646 parser = self.FUNCTION_PARSERS.get(upper) 5647 if parser and not anonymous: 5648 this = parser(self) 5649 else: 5650 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5651 5652 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5653 this = self.expression( 5654 subquery_predicate, comments=comments, this=self._parse_select() 5655 ) 5656 self._match_r_paren() 5657 return this 5658 5659 if functions is None: 5660 functions = self.FUNCTIONS 5661 5662 function = functions.get(upper) 5663 known_function = function and not anonymous 5664 5665 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5666 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5667 5668 post_func_comments = self._curr and self._curr.comments 5669 if known_function and post_func_comments: 5670 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5671 # call we'll construct it as exp.Anonymous, even if it's "known" 5672 if any( 5673 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5674 for comment in post_func_comments 5675 ): 5676 known_function = False 5677 5678 if alias and known_function: 5679 args = self._kv_to_prop_eq(args) 5680 5681 if known_function: 5682 func_builder = t.cast(t.Callable, function) 5683 5684 if "dialect" in func_builder.__code__.co_varnames: 5685 func = func_builder(args, dialect=self.dialect) 5686 else: 5687 func = func_builder(args) 5688 5689 func = self.validate_expression(func, args) 5690 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5691 func.meta["name"] = this 5692 5693 this = func 5694 else: 5695 if token_type == TokenType.IDENTIFIER: 5696 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5697 this = self.expression(exp.Anonymous, this=this, expressions=args) 5698 5699 if isinstance(this, exp.Expression): 5700 this.add_comments(comments) 5701 5702 self._match_r_paren(this) 5703 return self._parse_window(this) 5704 5705 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5706 return expression 5707 5708 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5709 transformed = [] 5710 5711 for index, e in enumerate(expressions): 5712 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5713 if isinstance(e, exp.Alias): 5714 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5715 5716 if not isinstance(e, exp.PropertyEQ): 5717 e = self.expression( 5718 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5719 ) 5720 5721 if isinstance(e.this, exp.Column): 5722 e.this.replace(e.this.this) 5723 else: 5724 e = self._to_prop_eq(e, index) 5725 5726 transformed.append(e) 5727 5728 return transformed 5729 5730 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5731 return self._parse_statement() 5732 5733 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5734 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5735 5736 def _parse_user_defined_function( 5737 self, kind: t.Optional[TokenType] = None 5738 ) -> t.Optional[exp.Expression]: 5739 this = self._parse_table_parts(schema=True) 5740 5741 if not self._match(TokenType.L_PAREN): 5742 return this 5743 5744 expressions = self._parse_csv(self._parse_function_parameter) 5745 self._match_r_paren() 5746 return self.expression( 5747 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5748 ) 5749 5750 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5751 literal = self._parse_primary() 5752 if literal: 5753 return self.expression(exp.Introducer, this=token.text, expression=literal) 5754 5755 return self._identifier_expression(token) 5756 5757 def _parse_session_parameter(self) -> exp.SessionParameter: 5758 kind = None 5759 this = self._parse_id_var() or self._parse_primary() 5760 5761 if this and self._match(TokenType.DOT): 5762 kind = this.name 5763 this = self._parse_var() or self._parse_primary() 5764 5765 return self.expression(exp.SessionParameter, this=this, kind=kind) 5766 5767 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5768 return self._parse_id_var() 5769 5770 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5771 index = self._index 5772 5773 if self._match(TokenType.L_PAREN): 5774 expressions = t.cast( 5775 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5776 ) 5777 5778 if not self._match(TokenType.R_PAREN): 5779 self._retreat(index) 5780 else: 5781 expressions = [self._parse_lambda_arg()] 5782 5783 if self._match_set(self.LAMBDAS): 5784 return self.LAMBDAS[self._prev.token_type](self, expressions) 5785 5786 self._retreat(index) 5787 5788 this: t.Optional[exp.Expression] 5789 5790 if self._match(TokenType.DISTINCT): 5791 this = self.expression( 5792 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5793 ) 5794 else: 5795 this = self._parse_select_or_expression(alias=alias) 5796 5797 return self._parse_limit( 5798 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5799 ) 5800 5801 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5802 index = self._index 5803 if not self._match(TokenType.L_PAREN): 5804 return this 5805 5806 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5807 # expr can be of both types 5808 if self._match_set(self.SELECT_START_TOKENS): 5809 self._retreat(index) 5810 return this 5811 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5812 self._match_r_paren() 5813 return self.expression(exp.Schema, this=this, expressions=args) 5814 5815 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5816 return self._parse_column_def(self._parse_field(any_token=True)) 5817 5818 def _parse_column_def( 5819 self, this: t.Optional[exp.Expression], computed_column: bool = True 5820 ) -> t.Optional[exp.Expression]: 5821 # column defs are not really columns, they're identifiers 5822 if isinstance(this, exp.Column): 5823 this = this.this 5824 5825 if not computed_column: 5826 self._match(TokenType.ALIAS) 5827 5828 kind = self._parse_types(schema=True) 5829 5830 if self._match_text_seq("FOR", "ORDINALITY"): 5831 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5832 5833 constraints: t.List[exp.Expression] = [] 5834 5835 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5836 ("ALIAS", "MATERIALIZED") 5837 ): 5838 persisted = self._prev.text.upper() == "MATERIALIZED" 5839 constraint_kind = exp.ComputedColumnConstraint( 5840 this=self._parse_assignment(), 5841 persisted=persisted or self._match_text_seq("PERSISTED"), 5842 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5843 ) 5844 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5845 elif ( 5846 kind 5847 and self._match(TokenType.ALIAS, advance=False) 5848 and ( 5849 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5850 or (self._next and self._next.token_type == TokenType.L_PAREN) 5851 ) 5852 ): 5853 self._advance() 5854 constraints.append( 5855 self.expression( 5856 exp.ColumnConstraint, 5857 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5858 ) 5859 ) 5860 5861 while True: 5862 constraint = self._parse_column_constraint() 5863 if not constraint: 5864 break 5865 constraints.append(constraint) 5866 5867 if not kind and not constraints: 5868 return this 5869 5870 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5871 5872 def _parse_auto_increment( 5873 self, 5874 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5875 start = None 5876 increment = None 5877 5878 if self._match(TokenType.L_PAREN, advance=False): 5879 args = self._parse_wrapped_csv(self._parse_bitwise) 5880 start = seq_get(args, 0) 5881 increment = seq_get(args, 1) 5882 elif self._match_text_seq("START"): 5883 start = self._parse_bitwise() 5884 self._match_text_seq("INCREMENT") 5885 increment = self._parse_bitwise() 5886 5887 if start and increment: 5888 return exp.GeneratedAsIdentityColumnConstraint( 5889 start=start, increment=increment, this=False 5890 ) 5891 5892 return exp.AutoIncrementColumnConstraint() 5893 5894 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5895 if not self._match_text_seq("REFRESH"): 5896 self._retreat(self._index - 1) 5897 return None 5898 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5899 5900 def _parse_compress(self) -> exp.CompressColumnConstraint: 5901 if self._match(TokenType.L_PAREN, advance=False): 5902 return self.expression( 5903 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5904 ) 5905 5906 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5907 5908 def _parse_generated_as_identity( 5909 self, 5910 ) -> ( 5911 exp.GeneratedAsIdentityColumnConstraint 5912 | exp.ComputedColumnConstraint 5913 | exp.GeneratedAsRowColumnConstraint 5914 ): 5915 if self._match_text_seq("BY", "DEFAULT"): 5916 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5917 this = self.expression( 5918 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5919 ) 5920 else: 5921 self._match_text_seq("ALWAYS") 5922 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5923 5924 self._match(TokenType.ALIAS) 5925 5926 if self._match_text_seq("ROW"): 5927 start = self._match_text_seq("START") 5928 if not start: 5929 self._match(TokenType.END) 5930 hidden = self._match_text_seq("HIDDEN") 5931 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5932 5933 identity = self._match_text_seq("IDENTITY") 5934 5935 if self._match(TokenType.L_PAREN): 5936 if self._match(TokenType.START_WITH): 5937 this.set("start", self._parse_bitwise()) 5938 if self._match_text_seq("INCREMENT", "BY"): 5939 this.set("increment", self._parse_bitwise()) 5940 if self._match_text_seq("MINVALUE"): 5941 this.set("minvalue", self._parse_bitwise()) 5942 if self._match_text_seq("MAXVALUE"): 5943 this.set("maxvalue", self._parse_bitwise()) 5944 5945 if self._match_text_seq("CYCLE"): 5946 this.set("cycle", True) 5947 elif self._match_text_seq("NO", "CYCLE"): 5948 this.set("cycle", False) 5949 5950 if not identity: 5951 this.set("expression", self._parse_range()) 5952 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5953 args = self._parse_csv(self._parse_bitwise) 5954 this.set("start", seq_get(args, 0)) 5955 this.set("increment", seq_get(args, 1)) 5956 5957 self._match_r_paren() 5958 5959 return this 5960 5961 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5962 self._match_text_seq("LENGTH") 5963 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5964 5965 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5966 if self._match_text_seq("NULL"): 5967 return self.expression(exp.NotNullColumnConstraint) 5968 if self._match_text_seq("CASESPECIFIC"): 5969 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5970 if self._match_text_seq("FOR", "REPLICATION"): 5971 return self.expression(exp.NotForReplicationColumnConstraint) 5972 5973 # Unconsume the `NOT` token 5974 self._retreat(self._index - 1) 5975 return None 5976 5977 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5978 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5979 5980 procedure_option_follows = ( 5981 self._match(TokenType.WITH, advance=False) 5982 and self._next 5983 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5984 ) 5985 5986 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5987 return self.expression( 5988 exp.ColumnConstraint, 5989 this=this, 5990 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5991 ) 5992 5993 return this 5994 5995 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5996 if not self._match(TokenType.CONSTRAINT): 5997 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5998 5999 return self.expression( 6000 exp.Constraint, 6001 this=self._parse_id_var(), 6002 expressions=self._parse_unnamed_constraints(), 6003 ) 6004 6005 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6006 constraints = [] 6007 while True: 6008 constraint = self._parse_unnamed_constraint() or self._parse_function() 6009 if not constraint: 6010 break 6011 constraints.append(constraint) 6012 6013 return constraints 6014 6015 def _parse_unnamed_constraint( 6016 self, constraints: t.Optional[t.Collection[str]] = None 6017 ) -> t.Optional[exp.Expression]: 6018 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6019 constraints or self.CONSTRAINT_PARSERS 6020 ): 6021 return None 6022 6023 constraint = self._prev.text.upper() 6024 if constraint not in self.CONSTRAINT_PARSERS: 6025 self.raise_error(f"No parser found for schema constraint {constraint}.") 6026 6027 return self.CONSTRAINT_PARSERS[constraint](self) 6028 6029 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6030 return self._parse_id_var(any_token=False) 6031 6032 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6033 self._match_text_seq("KEY") 6034 return self.expression( 6035 exp.UniqueColumnConstraint, 6036 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6037 this=self._parse_schema(self._parse_unique_key()), 6038 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6039 on_conflict=self._parse_on_conflict(), 6040 options=self._parse_key_constraint_options(), 6041 ) 6042 6043 def _parse_key_constraint_options(self) -> t.List[str]: 6044 options = [] 6045 while True: 6046 if not self._curr: 6047 break 6048 6049 if self._match(TokenType.ON): 6050 action = None 6051 on = self._advance_any() and self._prev.text 6052 6053 if self._match_text_seq("NO", "ACTION"): 6054 action = "NO ACTION" 6055 elif self._match_text_seq("CASCADE"): 6056 action = "CASCADE" 6057 elif self._match_text_seq("RESTRICT"): 6058 action = "RESTRICT" 6059 elif self._match_pair(TokenType.SET, TokenType.NULL): 6060 action = "SET NULL" 6061 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6062 action = "SET DEFAULT" 6063 else: 6064 self.raise_error("Invalid key constraint") 6065 6066 options.append(f"ON {on} {action}") 6067 else: 6068 var = self._parse_var_from_options( 6069 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6070 ) 6071 if not var: 6072 break 6073 options.append(var.name) 6074 6075 return options 6076 6077 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6078 if match and not self._match(TokenType.REFERENCES): 6079 return None 6080 6081 expressions = None 6082 this = self._parse_table(schema=True) 6083 options = self._parse_key_constraint_options() 6084 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6085 6086 def _parse_foreign_key(self) -> exp.ForeignKey: 6087 expressions = self._parse_wrapped_id_vars() 6088 reference = self._parse_references() 6089 on_options = {} 6090 6091 while self._match(TokenType.ON): 6092 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6093 self.raise_error("Expected DELETE or UPDATE") 6094 6095 kind = self._prev.text.lower() 6096 6097 if self._match_text_seq("NO", "ACTION"): 6098 action = "NO ACTION" 6099 elif self._match(TokenType.SET): 6100 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6101 action = "SET " + self._prev.text.upper() 6102 else: 6103 self._advance() 6104 action = self._prev.text.upper() 6105 6106 on_options[kind] = action 6107 6108 return self.expression( 6109 exp.ForeignKey, 6110 expressions=expressions, 6111 reference=reference, 6112 options=self._parse_key_constraint_options(), 6113 **on_options, # type: ignore 6114 ) 6115 6116 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6117 return self._parse_ordered() or self._parse_field() 6118 6119 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6120 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6121 self._retreat(self._index - 1) 6122 return None 6123 6124 id_vars = self._parse_wrapped_id_vars() 6125 return self.expression( 6126 exp.PeriodForSystemTimeConstraint, 6127 this=seq_get(id_vars, 0), 6128 expression=seq_get(id_vars, 1), 6129 ) 6130 6131 def _parse_primary_key( 6132 self, wrapped_optional: bool = False, in_props: bool = False 6133 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6134 desc = ( 6135 self._match_set((TokenType.ASC, TokenType.DESC)) 6136 and self._prev.token_type == TokenType.DESC 6137 ) 6138 6139 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6140 return self.expression( 6141 exp.PrimaryKeyColumnConstraint, 6142 desc=desc, 6143 options=self._parse_key_constraint_options(), 6144 ) 6145 6146 expressions = self._parse_wrapped_csv( 6147 self._parse_primary_key_part, optional=wrapped_optional 6148 ) 6149 options = self._parse_key_constraint_options() 6150 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6151 6152 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6153 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6154 6155 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6156 """ 6157 Parses a datetime column in ODBC format. We parse the column into the corresponding 6158 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6159 same as we did for `DATE('yyyy-mm-dd')`. 6160 6161 Reference: 6162 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6163 """ 6164 self._match(TokenType.VAR) 6165 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6166 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6167 if not self._match(TokenType.R_BRACE): 6168 self.raise_error("Expected }") 6169 return expression 6170 6171 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6172 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6173 return this 6174 6175 bracket_kind = self._prev.token_type 6176 if ( 6177 bracket_kind == TokenType.L_BRACE 6178 and self._curr 6179 and self._curr.token_type == TokenType.VAR 6180 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6181 ): 6182 return self._parse_odbc_datetime_literal() 6183 6184 expressions = self._parse_csv( 6185 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6186 ) 6187 6188 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6189 self.raise_error("Expected ]") 6190 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6191 self.raise_error("Expected }") 6192 6193 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6194 if bracket_kind == TokenType.L_BRACE: 6195 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6196 elif not this: 6197 this = build_array_constructor( 6198 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6199 ) 6200 else: 6201 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6202 if constructor_type: 6203 return build_array_constructor( 6204 constructor_type, 6205 args=expressions, 6206 bracket_kind=bracket_kind, 6207 dialect=self.dialect, 6208 ) 6209 6210 expressions = apply_index_offset( 6211 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6212 ) 6213 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6214 6215 self._add_comments(this) 6216 return self._parse_bracket(this) 6217 6218 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6219 if self._match(TokenType.COLON): 6220 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6221 return this 6222 6223 def _parse_case(self) -> t.Optional[exp.Expression]: 6224 ifs = [] 6225 default = None 6226 6227 comments = self._prev_comments 6228 expression = self._parse_assignment() 6229 6230 while self._match(TokenType.WHEN): 6231 this = self._parse_assignment() 6232 self._match(TokenType.THEN) 6233 then = self._parse_assignment() 6234 ifs.append(self.expression(exp.If, this=this, true=then)) 6235 6236 if self._match(TokenType.ELSE): 6237 default = self._parse_assignment() 6238 6239 if not self._match(TokenType.END): 6240 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6241 default = exp.column("interval") 6242 else: 6243 self.raise_error("Expected END after CASE", self._prev) 6244 6245 return self.expression( 6246 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6247 ) 6248 6249 def _parse_if(self) -> t.Optional[exp.Expression]: 6250 if self._match(TokenType.L_PAREN): 6251 args = self._parse_csv( 6252 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6253 ) 6254 this = self.validate_expression(exp.If.from_arg_list(args), args) 6255 self._match_r_paren() 6256 else: 6257 index = self._index - 1 6258 6259 if self.NO_PAREN_IF_COMMANDS and index == 0: 6260 return self._parse_as_command(self._prev) 6261 6262 condition = self._parse_assignment() 6263 6264 if not condition: 6265 self._retreat(index) 6266 return None 6267 6268 self._match(TokenType.THEN) 6269 true = self._parse_assignment() 6270 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6271 self._match(TokenType.END) 6272 this = self.expression(exp.If, this=condition, true=true, false=false) 6273 6274 return this 6275 6276 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6277 if not self._match_text_seq("VALUE", "FOR"): 6278 self._retreat(self._index - 1) 6279 return None 6280 6281 return self.expression( 6282 exp.NextValueFor, 6283 this=self._parse_column(), 6284 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6285 ) 6286 6287 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6288 this = self._parse_function() or self._parse_var_or_string(upper=True) 6289 6290 if self._match(TokenType.FROM): 6291 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6292 6293 if not self._match(TokenType.COMMA): 6294 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6295 6296 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6297 6298 def _parse_gap_fill(self) -> exp.GapFill: 6299 self._match(TokenType.TABLE) 6300 this = self._parse_table() 6301 6302 self._match(TokenType.COMMA) 6303 args = [this, *self._parse_csv(self._parse_lambda)] 6304 6305 gap_fill = exp.GapFill.from_arg_list(args) 6306 return self.validate_expression(gap_fill, args) 6307 6308 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6309 this = self._parse_assignment() 6310 6311 if not self._match(TokenType.ALIAS): 6312 if self._match(TokenType.COMMA): 6313 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6314 6315 self.raise_error("Expected AS after CAST") 6316 6317 fmt = None 6318 to = self._parse_types() 6319 6320 default = self._match(TokenType.DEFAULT) 6321 if default: 6322 default = self._parse_bitwise() 6323 self._match_text_seq("ON", "CONVERSION", "ERROR") 6324 6325 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6326 fmt_string = self._parse_string() 6327 fmt = self._parse_at_time_zone(fmt_string) 6328 6329 if not to: 6330 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6331 if to.this in exp.DataType.TEMPORAL_TYPES: 6332 this = self.expression( 6333 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6334 this=this, 6335 format=exp.Literal.string( 6336 format_time( 6337 fmt_string.this if fmt_string else "", 6338 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6339 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6340 ) 6341 ), 6342 safe=safe, 6343 ) 6344 6345 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6346 this.set("zone", fmt.args["zone"]) 6347 return this 6348 elif not to: 6349 self.raise_error("Expected TYPE after CAST") 6350 elif isinstance(to, exp.Identifier): 6351 to = exp.DataType.build(to.name, udt=True) 6352 elif to.this == exp.DataType.Type.CHAR: 6353 if self._match(TokenType.CHARACTER_SET): 6354 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6355 6356 return self.expression( 6357 exp.Cast if strict else exp.TryCast, 6358 this=this, 6359 to=to, 6360 format=fmt, 6361 safe=safe, 6362 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6363 default=default, 6364 ) 6365 6366 def _parse_string_agg(self) -> exp.GroupConcat: 6367 if self._match(TokenType.DISTINCT): 6368 args: t.List[t.Optional[exp.Expression]] = [ 6369 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6370 ] 6371 if self._match(TokenType.COMMA): 6372 args.extend(self._parse_csv(self._parse_assignment)) 6373 else: 6374 args = self._parse_csv(self._parse_assignment) # type: ignore 6375 6376 if self._match_text_seq("ON", "OVERFLOW"): 6377 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6378 if self._match_text_seq("ERROR"): 6379 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6380 else: 6381 self._match_text_seq("TRUNCATE") 6382 on_overflow = self.expression( 6383 exp.OverflowTruncateBehavior, 6384 this=self._parse_string(), 6385 with_count=( 6386 self._match_text_seq("WITH", "COUNT") 6387 or not self._match_text_seq("WITHOUT", "COUNT") 6388 ), 6389 ) 6390 else: 6391 on_overflow = None 6392 6393 index = self._index 6394 if not self._match(TokenType.R_PAREN) and args: 6395 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6396 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6397 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6398 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6399 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6400 6401 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6402 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6403 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6404 if not self._match_text_seq("WITHIN", "GROUP"): 6405 self._retreat(index) 6406 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6407 6408 # The corresponding match_r_paren will be called in parse_function (caller) 6409 self._match_l_paren() 6410 6411 return self.expression( 6412 exp.GroupConcat, 6413 this=self._parse_order(this=seq_get(args, 0)), 6414 separator=seq_get(args, 1), 6415 on_overflow=on_overflow, 6416 ) 6417 6418 def _parse_convert( 6419 self, strict: bool, safe: t.Optional[bool] = None 6420 ) -> t.Optional[exp.Expression]: 6421 this = self._parse_bitwise() 6422 6423 if self._match(TokenType.USING): 6424 to: t.Optional[exp.Expression] = self.expression( 6425 exp.CharacterSet, this=self._parse_var() 6426 ) 6427 elif self._match(TokenType.COMMA): 6428 to = self._parse_types() 6429 else: 6430 to = None 6431 6432 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6433 6434 def _parse_xml_table(self) -> exp.XMLTable: 6435 namespaces = None 6436 passing = None 6437 columns = None 6438 6439 if self._match_text_seq("XMLNAMESPACES", "("): 6440 namespaces = self._parse_xml_namespace() 6441 self._match_text_seq(")", ",") 6442 6443 this = self._parse_string() 6444 6445 if self._match_text_seq("PASSING"): 6446 # The BY VALUE keywords are optional and are provided for semantic clarity 6447 self._match_text_seq("BY", "VALUE") 6448 passing = self._parse_csv(self._parse_column) 6449 6450 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6451 6452 if self._match_text_seq("COLUMNS"): 6453 columns = self._parse_csv(self._parse_field_def) 6454 6455 return self.expression( 6456 exp.XMLTable, 6457 this=this, 6458 namespaces=namespaces, 6459 passing=passing, 6460 columns=columns, 6461 by_ref=by_ref, 6462 ) 6463 6464 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6465 namespaces = [] 6466 6467 while True: 6468 if self._match(TokenType.DEFAULT): 6469 uri = self._parse_string() 6470 else: 6471 uri = self._parse_alias(self._parse_string()) 6472 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6473 if not self._match(TokenType.COMMA): 6474 break 6475 6476 return namespaces 6477 6478 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6479 """ 6480 There are generally two variants of the DECODE function: 6481 6482 - DECODE(bin, charset) 6483 - DECODE(expression, search, result [, search, result] ... [, default]) 6484 6485 The second variant will always be parsed into a CASE expression. Note that NULL 6486 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6487 instead of relying on pattern matching. 6488 """ 6489 args = self._parse_csv(self._parse_assignment) 6490 6491 if len(args) < 3: 6492 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6493 6494 expression, *expressions = args 6495 if not expression: 6496 return None 6497 6498 ifs = [] 6499 for search, result in zip(expressions[::2], expressions[1::2]): 6500 if not search or not result: 6501 return None 6502 6503 if isinstance(search, exp.Literal): 6504 ifs.append( 6505 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6506 ) 6507 elif isinstance(search, exp.Null): 6508 ifs.append( 6509 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6510 ) 6511 else: 6512 cond = exp.or_( 6513 exp.EQ(this=expression.copy(), expression=search), 6514 exp.and_( 6515 exp.Is(this=expression.copy(), expression=exp.Null()), 6516 exp.Is(this=search.copy(), expression=exp.Null()), 6517 copy=False, 6518 ), 6519 copy=False, 6520 ) 6521 ifs.append(exp.If(this=cond, true=result)) 6522 6523 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6524 6525 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6526 self._match_text_seq("KEY") 6527 key = self._parse_column() 6528 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6529 self._match_text_seq("VALUE") 6530 value = self._parse_bitwise() 6531 6532 if not key and not value: 6533 return None 6534 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6535 6536 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6537 if not this or not self._match_text_seq("FORMAT", "JSON"): 6538 return this 6539 6540 return self.expression(exp.FormatJson, this=this) 6541 6542 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6543 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6544 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6545 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6546 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6547 else: 6548 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6549 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6550 6551 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6552 6553 if not empty and not error and not null: 6554 return None 6555 6556 return self.expression( 6557 exp.OnCondition, 6558 empty=empty, 6559 error=error, 6560 null=null, 6561 ) 6562 6563 def _parse_on_handling( 6564 self, on: str, *values: str 6565 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6566 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6567 for value in values: 6568 if self._match_text_seq(value, "ON", on): 6569 return f"{value} ON {on}" 6570 6571 index = self._index 6572 if self._match(TokenType.DEFAULT): 6573 default_value = self._parse_bitwise() 6574 if self._match_text_seq("ON", on): 6575 return default_value 6576 6577 self._retreat(index) 6578 6579 return None 6580 6581 @t.overload 6582 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6583 6584 @t.overload 6585 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6586 6587 def _parse_json_object(self, agg=False): 6588 star = self._parse_star() 6589 expressions = ( 6590 [star] 6591 if star 6592 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6593 ) 6594 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6595 6596 unique_keys = None 6597 if self._match_text_seq("WITH", "UNIQUE"): 6598 unique_keys = True 6599 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6600 unique_keys = False 6601 6602 self._match_text_seq("KEYS") 6603 6604 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6605 self._parse_type() 6606 ) 6607 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6608 6609 return self.expression( 6610 exp.JSONObjectAgg if agg else exp.JSONObject, 6611 expressions=expressions, 6612 null_handling=null_handling, 6613 unique_keys=unique_keys, 6614 return_type=return_type, 6615 encoding=encoding, 6616 ) 6617 6618 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6619 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6620 if not self._match_text_seq("NESTED"): 6621 this = self._parse_id_var() 6622 kind = self._parse_types(allow_identifiers=False) 6623 nested = None 6624 else: 6625 this = None 6626 kind = None 6627 nested = True 6628 6629 path = self._match_text_seq("PATH") and self._parse_string() 6630 nested_schema = nested and self._parse_json_schema() 6631 6632 return self.expression( 6633 exp.JSONColumnDef, 6634 this=this, 6635 kind=kind, 6636 path=path, 6637 nested_schema=nested_schema, 6638 ) 6639 6640 def _parse_json_schema(self) -> exp.JSONSchema: 6641 self._match_text_seq("COLUMNS") 6642 return self.expression( 6643 exp.JSONSchema, 6644 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6645 ) 6646 6647 def _parse_json_table(self) -> exp.JSONTable: 6648 this = self._parse_format_json(self._parse_bitwise()) 6649 path = self._match(TokenType.COMMA) and self._parse_string() 6650 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6651 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6652 schema = self._parse_json_schema() 6653 6654 return exp.JSONTable( 6655 this=this, 6656 schema=schema, 6657 path=path, 6658 error_handling=error_handling, 6659 empty_handling=empty_handling, 6660 ) 6661 6662 def _parse_match_against(self) -> exp.MatchAgainst: 6663 expressions = self._parse_csv(self._parse_column) 6664 6665 self._match_text_seq(")", "AGAINST", "(") 6666 6667 this = self._parse_string() 6668 6669 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6670 modifier = "IN NATURAL LANGUAGE MODE" 6671 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6672 modifier = f"{modifier} WITH QUERY EXPANSION" 6673 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6674 modifier = "IN BOOLEAN MODE" 6675 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6676 modifier = "WITH QUERY EXPANSION" 6677 else: 6678 modifier = None 6679 6680 return self.expression( 6681 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6682 ) 6683 6684 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6685 def _parse_open_json(self) -> exp.OpenJSON: 6686 this = self._parse_bitwise() 6687 path = self._match(TokenType.COMMA) and self._parse_string() 6688 6689 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6690 this = self._parse_field(any_token=True) 6691 kind = self._parse_types() 6692 path = self._parse_string() 6693 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6694 6695 return self.expression( 6696 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6697 ) 6698 6699 expressions = None 6700 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6701 self._match_l_paren() 6702 expressions = self._parse_csv(_parse_open_json_column_def) 6703 6704 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6705 6706 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6707 args = self._parse_csv(self._parse_bitwise) 6708 6709 if self._match(TokenType.IN): 6710 return self.expression( 6711 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6712 ) 6713 6714 if haystack_first: 6715 haystack = seq_get(args, 0) 6716 needle = seq_get(args, 1) 6717 else: 6718 haystack = seq_get(args, 1) 6719 needle = seq_get(args, 0) 6720 6721 return self.expression( 6722 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6723 ) 6724 6725 def _parse_predict(self) -> exp.Predict: 6726 self._match_text_seq("MODEL") 6727 this = self._parse_table() 6728 6729 self._match(TokenType.COMMA) 6730 self._match_text_seq("TABLE") 6731 6732 return self.expression( 6733 exp.Predict, 6734 this=this, 6735 expression=self._parse_table(), 6736 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6737 ) 6738 6739 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6740 args = self._parse_csv(self._parse_table) 6741 return exp.JoinHint(this=func_name.upper(), expressions=args) 6742 6743 def _parse_substring(self) -> exp.Substring: 6744 # Postgres supports the form: substring(string [from int] [for int]) 6745 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6746 6747 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6748 6749 if self._match(TokenType.FROM): 6750 args.append(self._parse_bitwise()) 6751 if self._match(TokenType.FOR): 6752 if len(args) == 1: 6753 args.append(exp.Literal.number(1)) 6754 args.append(self._parse_bitwise()) 6755 6756 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6757 6758 def _parse_trim(self) -> exp.Trim: 6759 # https://www.w3resource.com/sql/character-functions/trim.php 6760 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6761 6762 position = None 6763 collation = None 6764 expression = None 6765 6766 if self._match_texts(self.TRIM_TYPES): 6767 position = self._prev.text.upper() 6768 6769 this = self._parse_bitwise() 6770 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6771 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6772 expression = self._parse_bitwise() 6773 6774 if invert_order: 6775 this, expression = expression, this 6776 6777 if self._match(TokenType.COLLATE): 6778 collation = self._parse_bitwise() 6779 6780 return self.expression( 6781 exp.Trim, this=this, position=position, expression=expression, collation=collation 6782 ) 6783 6784 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6785 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6786 6787 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6788 return self._parse_window(self._parse_id_var(), alias=True) 6789 6790 def _parse_respect_or_ignore_nulls( 6791 self, this: t.Optional[exp.Expression] 6792 ) -> t.Optional[exp.Expression]: 6793 if self._match_text_seq("IGNORE", "NULLS"): 6794 return self.expression(exp.IgnoreNulls, this=this) 6795 if self._match_text_seq("RESPECT", "NULLS"): 6796 return self.expression(exp.RespectNulls, this=this) 6797 return this 6798 6799 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6800 if self._match(TokenType.HAVING): 6801 self._match_texts(("MAX", "MIN")) 6802 max = self._prev.text.upper() != "MIN" 6803 return self.expression( 6804 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6805 ) 6806 6807 return this 6808 6809 def _parse_window( 6810 self, this: t.Optional[exp.Expression], alias: bool = False 6811 ) -> t.Optional[exp.Expression]: 6812 func = this 6813 comments = func.comments if isinstance(func, exp.Expression) else None 6814 6815 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6816 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6817 if self._match_text_seq("WITHIN", "GROUP"): 6818 order = self._parse_wrapped(self._parse_order) 6819 this = self.expression(exp.WithinGroup, this=this, expression=order) 6820 6821 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6822 self._match(TokenType.WHERE) 6823 this = self.expression( 6824 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6825 ) 6826 self._match_r_paren() 6827 6828 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6829 # Some dialects choose to implement and some do not. 6830 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6831 6832 # There is some code above in _parse_lambda that handles 6833 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6834 6835 # The below changes handle 6836 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6837 6838 # Oracle allows both formats 6839 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6840 # and Snowflake chose to do the same for familiarity 6841 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6842 if isinstance(this, exp.AggFunc): 6843 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6844 6845 if ignore_respect and ignore_respect is not this: 6846 ignore_respect.replace(ignore_respect.this) 6847 this = self.expression(ignore_respect.__class__, this=this) 6848 6849 this = self._parse_respect_or_ignore_nulls(this) 6850 6851 # bigquery select from window x AS (partition by ...) 6852 if alias: 6853 over = None 6854 self._match(TokenType.ALIAS) 6855 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6856 return this 6857 else: 6858 over = self._prev.text.upper() 6859 6860 if comments and isinstance(func, exp.Expression): 6861 func.pop_comments() 6862 6863 if not self._match(TokenType.L_PAREN): 6864 return self.expression( 6865 exp.Window, 6866 comments=comments, 6867 this=this, 6868 alias=self._parse_id_var(False), 6869 over=over, 6870 ) 6871 6872 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6873 6874 first = self._match(TokenType.FIRST) 6875 if self._match_text_seq("LAST"): 6876 first = False 6877 6878 partition, order = self._parse_partition_and_order() 6879 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6880 6881 if kind: 6882 self._match(TokenType.BETWEEN) 6883 start = self._parse_window_spec() 6884 self._match(TokenType.AND) 6885 end = self._parse_window_spec() 6886 6887 spec = self.expression( 6888 exp.WindowSpec, 6889 kind=kind, 6890 start=start["value"], 6891 start_side=start["side"], 6892 end=end["value"], 6893 end_side=end["side"], 6894 ) 6895 else: 6896 spec = None 6897 6898 self._match_r_paren() 6899 6900 window = self.expression( 6901 exp.Window, 6902 comments=comments, 6903 this=this, 6904 partition_by=partition, 6905 order=order, 6906 spec=spec, 6907 alias=window_alias, 6908 over=over, 6909 first=first, 6910 ) 6911 6912 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6913 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6914 return self._parse_window(window, alias=alias) 6915 6916 return window 6917 6918 def _parse_partition_and_order( 6919 self, 6920 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6921 return self._parse_partition_by(), self._parse_order() 6922 6923 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6924 self._match(TokenType.BETWEEN) 6925 6926 return { 6927 "value": ( 6928 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6929 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6930 or self._parse_bitwise() 6931 ), 6932 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6933 } 6934 6935 def _parse_alias( 6936 self, this: t.Optional[exp.Expression], explicit: bool = False 6937 ) -> t.Optional[exp.Expression]: 6938 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6939 # so this section tries to parse the clause version and if it fails, it treats the token 6940 # as an identifier (alias) 6941 if self._can_parse_limit_or_offset(): 6942 return this 6943 6944 any_token = self._match(TokenType.ALIAS) 6945 comments = self._prev_comments or [] 6946 6947 if explicit and not any_token: 6948 return this 6949 6950 if self._match(TokenType.L_PAREN): 6951 aliases = self.expression( 6952 exp.Aliases, 6953 comments=comments, 6954 this=this, 6955 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6956 ) 6957 self._match_r_paren(aliases) 6958 return aliases 6959 6960 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6961 self.STRING_ALIASES and self._parse_string_as_identifier() 6962 ) 6963 6964 if alias: 6965 comments.extend(alias.pop_comments()) 6966 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6967 column = this.this 6968 6969 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6970 if not this.comments and column and column.comments: 6971 this.comments = column.pop_comments() 6972 6973 return this 6974 6975 def _parse_id_var( 6976 self, 6977 any_token: bool = True, 6978 tokens: t.Optional[t.Collection[TokenType]] = None, 6979 ) -> t.Optional[exp.Expression]: 6980 expression = self._parse_identifier() 6981 if not expression and ( 6982 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6983 ): 6984 quoted = self._prev.token_type == TokenType.STRING 6985 expression = self._identifier_expression(quoted=quoted) 6986 6987 return expression 6988 6989 def _parse_string(self) -> t.Optional[exp.Expression]: 6990 if self._match_set(self.STRING_PARSERS): 6991 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6992 return self._parse_placeholder() 6993 6994 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6995 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6996 if output: 6997 output.update_positions(self._prev) 6998 return output 6999 7000 def _parse_number(self) -> t.Optional[exp.Expression]: 7001 if self._match_set(self.NUMERIC_PARSERS): 7002 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7003 return self._parse_placeholder() 7004 7005 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7006 if self._match(TokenType.IDENTIFIER): 7007 return self._identifier_expression(quoted=True) 7008 return self._parse_placeholder() 7009 7010 def _parse_var( 7011 self, 7012 any_token: bool = False, 7013 tokens: t.Optional[t.Collection[TokenType]] = None, 7014 upper: bool = False, 7015 ) -> t.Optional[exp.Expression]: 7016 if ( 7017 (any_token and self._advance_any()) 7018 or self._match(TokenType.VAR) 7019 or (self._match_set(tokens) if tokens else False) 7020 ): 7021 return self.expression( 7022 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7023 ) 7024 return self._parse_placeholder() 7025 7026 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7027 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7028 self._advance() 7029 return self._prev 7030 return None 7031 7032 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7033 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7034 7035 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7036 return self._parse_primary() or self._parse_var(any_token=True) 7037 7038 def _parse_null(self) -> t.Optional[exp.Expression]: 7039 if self._match_set(self.NULL_TOKENS): 7040 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7041 return self._parse_placeholder() 7042 7043 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7044 if self._match(TokenType.TRUE): 7045 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7046 if self._match(TokenType.FALSE): 7047 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7048 return self._parse_placeholder() 7049 7050 def _parse_star(self) -> t.Optional[exp.Expression]: 7051 if self._match(TokenType.STAR): 7052 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7053 return self._parse_placeholder() 7054 7055 def _parse_parameter(self) -> exp.Parameter: 7056 this = self._parse_identifier() or self._parse_primary_or_var() 7057 return self.expression(exp.Parameter, this=this) 7058 7059 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7060 if self._match_set(self.PLACEHOLDER_PARSERS): 7061 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7062 if placeholder: 7063 return placeholder 7064 self._advance(-1) 7065 return None 7066 7067 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7068 if not self._match_texts(keywords): 7069 return None 7070 if self._match(TokenType.L_PAREN, advance=False): 7071 return self._parse_wrapped_csv(self._parse_expression) 7072 7073 expression = self._parse_expression() 7074 return [expression] if expression else None 7075 7076 def _parse_csv( 7077 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7078 ) -> t.List[exp.Expression]: 7079 parse_result = parse_method() 7080 items = [parse_result] if parse_result is not None else [] 7081 7082 while self._match(sep): 7083 self._add_comments(parse_result) 7084 parse_result = parse_method() 7085 if parse_result is not None: 7086 items.append(parse_result) 7087 7088 return items 7089 7090 def _parse_tokens( 7091 self, parse_method: t.Callable, expressions: t.Dict 7092 ) -> t.Optional[exp.Expression]: 7093 this = parse_method() 7094 7095 while self._match_set(expressions): 7096 this = self.expression( 7097 expressions[self._prev.token_type], 7098 this=this, 7099 comments=self._prev_comments, 7100 expression=parse_method(), 7101 ) 7102 7103 return this 7104 7105 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7106 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7107 7108 def _parse_wrapped_csv( 7109 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7110 ) -> t.List[exp.Expression]: 7111 return self._parse_wrapped( 7112 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7113 ) 7114 7115 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7116 wrapped = self._match(TokenType.L_PAREN) 7117 if not wrapped and not optional: 7118 self.raise_error("Expecting (") 7119 parse_result = parse_method() 7120 if wrapped: 7121 self._match_r_paren() 7122 return parse_result 7123 7124 def _parse_expressions(self) -> t.List[exp.Expression]: 7125 return self._parse_csv(self._parse_expression) 7126 7127 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7128 return self._parse_select() or self._parse_set_operations( 7129 self._parse_alias(self._parse_assignment(), explicit=True) 7130 if alias 7131 else self._parse_assignment() 7132 ) 7133 7134 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7135 return self._parse_query_modifiers( 7136 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7137 ) 7138 7139 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7140 this = None 7141 if self._match_texts(self.TRANSACTION_KIND): 7142 this = self._prev.text 7143 7144 self._match_texts(("TRANSACTION", "WORK")) 7145 7146 modes = [] 7147 while True: 7148 mode = [] 7149 while self._match(TokenType.VAR): 7150 mode.append(self._prev.text) 7151 7152 if mode: 7153 modes.append(" ".join(mode)) 7154 if not self._match(TokenType.COMMA): 7155 break 7156 7157 return self.expression(exp.Transaction, this=this, modes=modes) 7158 7159 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7160 chain = None 7161 savepoint = None 7162 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7163 7164 self._match_texts(("TRANSACTION", "WORK")) 7165 7166 if self._match_text_seq("TO"): 7167 self._match_text_seq("SAVEPOINT") 7168 savepoint = self._parse_id_var() 7169 7170 if self._match(TokenType.AND): 7171 chain = not self._match_text_seq("NO") 7172 self._match_text_seq("CHAIN") 7173 7174 if is_rollback: 7175 return self.expression(exp.Rollback, savepoint=savepoint) 7176 7177 return self.expression(exp.Commit, chain=chain) 7178 7179 def _parse_refresh(self) -> exp.Refresh: 7180 self._match(TokenType.TABLE) 7181 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7182 7183 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7184 if not self._match_text_seq("ADD"): 7185 return None 7186 7187 self._match(TokenType.COLUMN) 7188 exists_column = self._parse_exists(not_=True) 7189 expression = self._parse_field_def() 7190 7191 if expression: 7192 expression.set("exists", exists_column) 7193 7194 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7195 if self._match_texts(("FIRST", "AFTER")): 7196 position = self._prev.text 7197 column_position = self.expression( 7198 exp.ColumnPosition, this=self._parse_column(), position=position 7199 ) 7200 expression.set("position", column_position) 7201 7202 return expression 7203 7204 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7205 drop = self._match(TokenType.DROP) and self._parse_drop() 7206 if drop and not isinstance(drop, exp.Command): 7207 drop.set("kind", drop.args.get("kind", "COLUMN")) 7208 return drop 7209 7210 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7211 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7212 return self.expression( 7213 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7214 ) 7215 7216 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7217 index = self._index - 1 7218 7219 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7220 return self._parse_csv( 7221 lambda: self.expression( 7222 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7223 ) 7224 ) 7225 7226 self._retreat(index) 7227 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7228 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7229 7230 if self._match_text_seq("ADD", "COLUMNS"): 7231 schema = self._parse_schema() 7232 if schema: 7233 return [schema] 7234 return [] 7235 7236 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7237 7238 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7239 if self._match_texts(self.ALTER_ALTER_PARSERS): 7240 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7241 7242 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7243 # keyword after ALTER we default to parsing this statement 7244 self._match(TokenType.COLUMN) 7245 column = self._parse_field(any_token=True) 7246 7247 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7248 return self.expression(exp.AlterColumn, this=column, drop=True) 7249 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7250 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7251 if self._match(TokenType.COMMENT): 7252 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7253 if self._match_text_seq("DROP", "NOT", "NULL"): 7254 return self.expression( 7255 exp.AlterColumn, 7256 this=column, 7257 drop=True, 7258 allow_null=True, 7259 ) 7260 if self._match_text_seq("SET", "NOT", "NULL"): 7261 return self.expression( 7262 exp.AlterColumn, 7263 this=column, 7264 allow_null=False, 7265 ) 7266 7267 if self._match_text_seq("SET", "VISIBLE"): 7268 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7269 if self._match_text_seq("SET", "INVISIBLE"): 7270 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7271 7272 self._match_text_seq("SET", "DATA") 7273 self._match_text_seq("TYPE") 7274 return self.expression( 7275 exp.AlterColumn, 7276 this=column, 7277 dtype=self._parse_types(), 7278 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7279 using=self._match(TokenType.USING) and self._parse_assignment(), 7280 ) 7281 7282 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7283 if self._match_texts(("ALL", "EVEN", "AUTO")): 7284 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7285 7286 self._match_text_seq("KEY", "DISTKEY") 7287 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7288 7289 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7290 if compound: 7291 self._match_text_seq("SORTKEY") 7292 7293 if self._match(TokenType.L_PAREN, advance=False): 7294 return self.expression( 7295 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7296 ) 7297 7298 self._match_texts(("AUTO", "NONE")) 7299 return self.expression( 7300 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7301 ) 7302 7303 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7304 index = self._index - 1 7305 7306 partition_exists = self._parse_exists() 7307 if self._match(TokenType.PARTITION, advance=False): 7308 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7309 7310 self._retreat(index) 7311 return self._parse_csv(self._parse_drop_column) 7312 7313 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7314 if self._match(TokenType.COLUMN): 7315 exists = self._parse_exists() 7316 old_column = self._parse_column() 7317 to = self._match_text_seq("TO") 7318 new_column = self._parse_column() 7319 7320 if old_column is None or to is None or new_column is None: 7321 return None 7322 7323 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7324 7325 self._match_text_seq("TO") 7326 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7327 7328 def _parse_alter_table_set(self) -> exp.AlterSet: 7329 alter_set = self.expression(exp.AlterSet) 7330 7331 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7332 "TABLE", "PROPERTIES" 7333 ): 7334 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7335 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7336 alter_set.set("expressions", [self._parse_assignment()]) 7337 elif self._match_texts(("LOGGED", "UNLOGGED")): 7338 alter_set.set("option", exp.var(self._prev.text.upper())) 7339 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7340 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7341 elif self._match_text_seq("LOCATION"): 7342 alter_set.set("location", self._parse_field()) 7343 elif self._match_text_seq("ACCESS", "METHOD"): 7344 alter_set.set("access_method", self._parse_field()) 7345 elif self._match_text_seq("TABLESPACE"): 7346 alter_set.set("tablespace", self._parse_field()) 7347 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7348 alter_set.set("file_format", [self._parse_field()]) 7349 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7350 alter_set.set("file_format", self._parse_wrapped_options()) 7351 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7352 alter_set.set("copy_options", self._parse_wrapped_options()) 7353 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7354 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7355 else: 7356 if self._match_text_seq("SERDE"): 7357 alter_set.set("serde", self._parse_field()) 7358 7359 alter_set.set("expressions", [self._parse_properties()]) 7360 7361 return alter_set 7362 7363 def _parse_alter(self) -> exp.Alter | exp.Command: 7364 start = self._prev 7365 7366 alter_token = self._match_set(self.ALTERABLES) and self._prev 7367 if not alter_token: 7368 return self._parse_as_command(start) 7369 7370 exists = self._parse_exists() 7371 only = self._match_text_seq("ONLY") 7372 this = self._parse_table(schema=True) 7373 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7374 7375 if self._next: 7376 self._advance() 7377 7378 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7379 if parser: 7380 actions = ensure_list(parser(self)) 7381 not_valid = self._match_text_seq("NOT", "VALID") 7382 options = self._parse_csv(self._parse_property) 7383 7384 if not self._curr and actions: 7385 return self.expression( 7386 exp.Alter, 7387 this=this, 7388 kind=alter_token.text.upper(), 7389 exists=exists, 7390 actions=actions, 7391 only=only, 7392 options=options, 7393 cluster=cluster, 7394 not_valid=not_valid, 7395 ) 7396 7397 return self._parse_as_command(start) 7398 7399 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7400 start = self._prev 7401 # https://duckdb.org/docs/sql/statements/analyze 7402 if not self._curr: 7403 return self.expression(exp.Analyze) 7404 7405 options = [] 7406 while self._match_texts(self.ANALYZE_STYLES): 7407 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7408 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7409 else: 7410 options.append(self._prev.text.upper()) 7411 7412 this: t.Optional[exp.Expression] = None 7413 inner_expression: t.Optional[exp.Expression] = None 7414 7415 kind = self._curr and self._curr.text.upper() 7416 7417 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7418 this = self._parse_table_parts() 7419 elif self._match_text_seq("TABLES"): 7420 if self._match_set((TokenType.FROM, TokenType.IN)): 7421 kind = f"{kind} {self._prev.text.upper()}" 7422 this = self._parse_table(schema=True, is_db_reference=True) 7423 elif self._match_text_seq("DATABASE"): 7424 this = self._parse_table(schema=True, is_db_reference=True) 7425 elif self._match_text_seq("CLUSTER"): 7426 this = self._parse_table() 7427 # Try matching inner expr keywords before fallback to parse table. 7428 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7429 kind = None 7430 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7431 else: 7432 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7433 kind = None 7434 this = self._parse_table_parts() 7435 7436 partition = self._try_parse(self._parse_partition) 7437 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7438 return self._parse_as_command(start) 7439 7440 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7441 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7442 "WITH", "ASYNC", "MODE" 7443 ): 7444 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7445 else: 7446 mode = None 7447 7448 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7449 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7450 7451 properties = self._parse_properties() 7452 return self.expression( 7453 exp.Analyze, 7454 kind=kind, 7455 this=this, 7456 mode=mode, 7457 partition=partition, 7458 properties=properties, 7459 expression=inner_expression, 7460 options=options, 7461 ) 7462 7463 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7464 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7465 this = None 7466 kind = self._prev.text.upper() 7467 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7468 expressions = [] 7469 7470 if not self._match_text_seq("STATISTICS"): 7471 self.raise_error("Expecting token STATISTICS") 7472 7473 if self._match_text_seq("NOSCAN"): 7474 this = "NOSCAN" 7475 elif self._match(TokenType.FOR): 7476 if self._match_text_seq("ALL", "COLUMNS"): 7477 this = "FOR ALL COLUMNS" 7478 if self._match_texts("COLUMNS"): 7479 this = "FOR COLUMNS" 7480 expressions = self._parse_csv(self._parse_column_reference) 7481 elif self._match_text_seq("SAMPLE"): 7482 sample = self._parse_number() 7483 expressions = [ 7484 self.expression( 7485 exp.AnalyzeSample, 7486 sample=sample, 7487 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7488 ) 7489 ] 7490 7491 return self.expression( 7492 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7493 ) 7494 7495 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7496 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7497 kind = None 7498 this = None 7499 expression: t.Optional[exp.Expression] = None 7500 if self._match_text_seq("REF", "UPDATE"): 7501 kind = "REF" 7502 this = "UPDATE" 7503 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7504 this = "UPDATE SET DANGLING TO NULL" 7505 elif self._match_text_seq("STRUCTURE"): 7506 kind = "STRUCTURE" 7507 if self._match_text_seq("CASCADE", "FAST"): 7508 this = "CASCADE FAST" 7509 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7510 ("ONLINE", "OFFLINE") 7511 ): 7512 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7513 expression = self._parse_into() 7514 7515 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7516 7517 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7518 this = self._prev.text.upper() 7519 if self._match_text_seq("COLUMNS"): 7520 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7521 return None 7522 7523 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7524 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7525 if self._match_text_seq("STATISTICS"): 7526 return self.expression(exp.AnalyzeDelete, kind=kind) 7527 return None 7528 7529 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7530 if self._match_text_seq("CHAINED", "ROWS"): 7531 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7532 return None 7533 7534 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7535 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7536 this = self._prev.text.upper() 7537 expression: t.Optional[exp.Expression] = None 7538 expressions = [] 7539 update_options = None 7540 7541 if self._match_text_seq("HISTOGRAM", "ON"): 7542 expressions = self._parse_csv(self._parse_column_reference) 7543 with_expressions = [] 7544 while self._match(TokenType.WITH): 7545 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7546 if self._match_texts(("SYNC", "ASYNC")): 7547 if self._match_text_seq("MODE", advance=False): 7548 with_expressions.append(f"{self._prev.text.upper()} MODE") 7549 self._advance() 7550 else: 7551 buckets = self._parse_number() 7552 if self._match_text_seq("BUCKETS"): 7553 with_expressions.append(f"{buckets} BUCKETS") 7554 if with_expressions: 7555 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7556 7557 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7558 TokenType.UPDATE, advance=False 7559 ): 7560 update_options = self._prev.text.upper() 7561 self._advance() 7562 elif self._match_text_seq("USING", "DATA"): 7563 expression = self.expression(exp.UsingData, this=self._parse_string()) 7564 7565 return self.expression( 7566 exp.AnalyzeHistogram, 7567 this=this, 7568 expressions=expressions, 7569 expression=expression, 7570 update_options=update_options, 7571 ) 7572 7573 def _parse_merge(self) -> exp.Merge: 7574 self._match(TokenType.INTO) 7575 target = self._parse_table() 7576 7577 if target and self._match(TokenType.ALIAS, advance=False): 7578 target.set("alias", self._parse_table_alias()) 7579 7580 self._match(TokenType.USING) 7581 using = self._parse_table() 7582 7583 self._match(TokenType.ON) 7584 on = self._parse_assignment() 7585 7586 return self.expression( 7587 exp.Merge, 7588 this=target, 7589 using=using, 7590 on=on, 7591 whens=self._parse_when_matched(), 7592 returning=self._parse_returning(), 7593 ) 7594 7595 def _parse_when_matched(self) -> exp.Whens: 7596 whens = [] 7597 7598 while self._match(TokenType.WHEN): 7599 matched = not self._match(TokenType.NOT) 7600 self._match_text_seq("MATCHED") 7601 source = ( 7602 False 7603 if self._match_text_seq("BY", "TARGET") 7604 else self._match_text_seq("BY", "SOURCE") 7605 ) 7606 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7607 7608 self._match(TokenType.THEN) 7609 7610 if self._match(TokenType.INSERT): 7611 this = self._parse_star() 7612 if this: 7613 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7614 else: 7615 then = self.expression( 7616 exp.Insert, 7617 this=exp.var("ROW") 7618 if self._match_text_seq("ROW") 7619 else self._parse_value(values=False), 7620 expression=self._match_text_seq("VALUES") and self._parse_value(), 7621 ) 7622 elif self._match(TokenType.UPDATE): 7623 expressions = self._parse_star() 7624 if expressions: 7625 then = self.expression(exp.Update, expressions=expressions) 7626 else: 7627 then = self.expression( 7628 exp.Update, 7629 expressions=self._match(TokenType.SET) 7630 and self._parse_csv(self._parse_equality), 7631 ) 7632 elif self._match(TokenType.DELETE): 7633 then = self.expression(exp.Var, this=self._prev.text) 7634 else: 7635 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7636 7637 whens.append( 7638 self.expression( 7639 exp.When, 7640 matched=matched, 7641 source=source, 7642 condition=condition, 7643 then=then, 7644 ) 7645 ) 7646 return self.expression(exp.Whens, expressions=whens) 7647 7648 def _parse_show(self) -> t.Optional[exp.Expression]: 7649 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7650 if parser: 7651 return parser(self) 7652 return self._parse_as_command(self._prev) 7653 7654 def _parse_set_item_assignment( 7655 self, kind: t.Optional[str] = None 7656 ) -> t.Optional[exp.Expression]: 7657 index = self._index 7658 7659 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7660 return self._parse_set_transaction(global_=kind == "GLOBAL") 7661 7662 left = self._parse_primary() or self._parse_column() 7663 assignment_delimiter = self._match_texts(("=", "TO")) 7664 7665 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7666 self._retreat(index) 7667 return None 7668 7669 right = self._parse_statement() or self._parse_id_var() 7670 if isinstance(right, (exp.Column, exp.Identifier)): 7671 right = exp.var(right.name) 7672 7673 this = self.expression(exp.EQ, this=left, expression=right) 7674 return self.expression(exp.SetItem, this=this, kind=kind) 7675 7676 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7677 self._match_text_seq("TRANSACTION") 7678 characteristics = self._parse_csv( 7679 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7680 ) 7681 return self.expression( 7682 exp.SetItem, 7683 expressions=characteristics, 7684 kind="TRANSACTION", 7685 **{"global": global_}, # type: ignore 7686 ) 7687 7688 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7689 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7690 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7691 7692 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7693 index = self._index 7694 set_ = self.expression( 7695 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7696 ) 7697 7698 if self._curr: 7699 self._retreat(index) 7700 return self._parse_as_command(self._prev) 7701 7702 return set_ 7703 7704 def _parse_var_from_options( 7705 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7706 ) -> t.Optional[exp.Var]: 7707 start = self._curr 7708 if not start: 7709 return None 7710 7711 option = start.text.upper() 7712 continuations = options.get(option) 7713 7714 index = self._index 7715 self._advance() 7716 for keywords in continuations or []: 7717 if isinstance(keywords, str): 7718 keywords = (keywords,) 7719 7720 if self._match_text_seq(*keywords): 7721 option = f"{option} {' '.join(keywords)}" 7722 break 7723 else: 7724 if continuations or continuations is None: 7725 if raise_unmatched: 7726 self.raise_error(f"Unknown option {option}") 7727 7728 self._retreat(index) 7729 return None 7730 7731 return exp.var(option) 7732 7733 def _parse_as_command(self, start: Token) -> exp.Command: 7734 while self._curr: 7735 self._advance() 7736 text = self._find_sql(start, self._prev) 7737 size = len(start.text) 7738 self._warn_unsupported() 7739 return exp.Command(this=text[:size], expression=text[size:]) 7740 7741 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7742 settings = [] 7743 7744 self._match_l_paren() 7745 kind = self._parse_id_var() 7746 7747 if self._match(TokenType.L_PAREN): 7748 while True: 7749 key = self._parse_id_var() 7750 value = self._parse_primary() 7751 if not key and value is None: 7752 break 7753 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7754 self._match(TokenType.R_PAREN) 7755 7756 self._match_r_paren() 7757 7758 return self.expression( 7759 exp.DictProperty, 7760 this=this, 7761 kind=kind.this if kind else None, 7762 settings=settings, 7763 ) 7764 7765 def _parse_dict_range(self, this: str) -> exp.DictRange: 7766 self._match_l_paren() 7767 has_min = self._match_text_seq("MIN") 7768 if has_min: 7769 min = self._parse_var() or self._parse_primary() 7770 self._match_text_seq("MAX") 7771 max = self._parse_var() or self._parse_primary() 7772 else: 7773 max = self._parse_var() or self._parse_primary() 7774 min = exp.Literal.number(0) 7775 self._match_r_paren() 7776 return self.expression(exp.DictRange, this=this, min=min, max=max) 7777 7778 def _parse_comprehension( 7779 self, this: t.Optional[exp.Expression] 7780 ) -> t.Optional[exp.Comprehension]: 7781 index = self._index 7782 expression = self._parse_column() 7783 if not self._match(TokenType.IN): 7784 self._retreat(index - 1) 7785 return None 7786 iterator = self._parse_column() 7787 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7788 return self.expression( 7789 exp.Comprehension, 7790 this=this, 7791 expression=expression, 7792 iterator=iterator, 7793 condition=condition, 7794 ) 7795 7796 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7797 if self._match(TokenType.HEREDOC_STRING): 7798 return self.expression(exp.Heredoc, this=self._prev.text) 7799 7800 if not self._match_text_seq("$"): 7801 return None 7802 7803 tags = ["$"] 7804 tag_text = None 7805 7806 if self._is_connected(): 7807 self._advance() 7808 tags.append(self._prev.text.upper()) 7809 else: 7810 self.raise_error("No closing $ found") 7811 7812 if tags[-1] != "$": 7813 if self._is_connected() and self._match_text_seq("$"): 7814 tag_text = tags[-1] 7815 tags.append("$") 7816 else: 7817 self.raise_error("No closing $ found") 7818 7819 heredoc_start = self._curr 7820 7821 while self._curr: 7822 if self._match_text_seq(*tags, advance=False): 7823 this = self._find_sql(heredoc_start, self._prev) 7824 self._advance(len(tags)) 7825 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7826 7827 self._advance() 7828 7829 self.raise_error(f"No closing {''.join(tags)} found") 7830 return None 7831 7832 def _find_parser( 7833 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7834 ) -> t.Optional[t.Callable]: 7835 if not self._curr: 7836 return None 7837 7838 index = self._index 7839 this = [] 7840 while True: 7841 # The current token might be multiple words 7842 curr = self._curr.text.upper() 7843 key = curr.split(" ") 7844 this.append(curr) 7845 7846 self._advance() 7847 result, trie = in_trie(trie, key) 7848 if result == TrieResult.FAILED: 7849 break 7850 7851 if result == TrieResult.EXISTS: 7852 subparser = parsers[" ".join(this)] 7853 return subparser 7854 7855 self._retreat(index) 7856 return None 7857 7858 def _match(self, token_type, advance=True, expression=None): 7859 if not self._curr: 7860 return None 7861 7862 if self._curr.token_type == token_type: 7863 if advance: 7864 self._advance() 7865 self._add_comments(expression) 7866 return True 7867 7868 return None 7869 7870 def _match_set(self, types, advance=True): 7871 if not self._curr: 7872 return None 7873 7874 if self._curr.token_type in types: 7875 if advance: 7876 self._advance() 7877 return True 7878 7879 return None 7880 7881 def _match_pair(self, token_type_a, token_type_b, advance=True): 7882 if not self._curr or not self._next: 7883 return None 7884 7885 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7886 if advance: 7887 self._advance(2) 7888 return True 7889 7890 return None 7891 7892 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7893 if not self._match(TokenType.L_PAREN, expression=expression): 7894 self.raise_error("Expecting (") 7895 7896 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7897 if not self._match(TokenType.R_PAREN, expression=expression): 7898 self.raise_error("Expecting )") 7899 7900 def _match_texts(self, texts, advance=True): 7901 if ( 7902 self._curr 7903 and self._curr.token_type != TokenType.STRING 7904 and self._curr.text.upper() in texts 7905 ): 7906 if advance: 7907 self._advance() 7908 return True 7909 return None 7910 7911 def _match_text_seq(self, *texts, advance=True): 7912 index = self._index 7913 for text in texts: 7914 if ( 7915 self._curr 7916 and self._curr.token_type != TokenType.STRING 7917 and self._curr.text.upper() == text 7918 ): 7919 self._advance() 7920 else: 7921 self._retreat(index) 7922 return None 7923 7924 if not advance: 7925 self._retreat(index) 7926 7927 return True 7928 7929 def _replace_lambda( 7930 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7931 ) -> t.Optional[exp.Expression]: 7932 if not node: 7933 return node 7934 7935 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7936 7937 for column in node.find_all(exp.Column): 7938 typ = lambda_types.get(column.parts[0].name) 7939 if typ is not None: 7940 dot_or_id = column.to_dot() if column.table else column.this 7941 7942 if typ: 7943 dot_or_id = self.expression( 7944 exp.Cast, 7945 this=dot_or_id, 7946 to=typ, 7947 ) 7948 7949 parent = column.parent 7950 7951 while isinstance(parent, exp.Dot): 7952 if not isinstance(parent.parent, exp.Dot): 7953 parent.replace(dot_or_id) 7954 break 7955 parent = parent.parent 7956 else: 7957 if column is node: 7958 node = dot_or_id 7959 else: 7960 column.replace(dot_or_id) 7961 return node 7962 7963 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7964 start = self._prev 7965 7966 # Not to be confused with TRUNCATE(number, decimals) function call 7967 if self._match(TokenType.L_PAREN): 7968 self._retreat(self._index - 2) 7969 return self._parse_function() 7970 7971 # Clickhouse supports TRUNCATE DATABASE as well 7972 is_database = self._match(TokenType.DATABASE) 7973 7974 self._match(TokenType.TABLE) 7975 7976 exists = self._parse_exists(not_=False) 7977 7978 expressions = self._parse_csv( 7979 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7980 ) 7981 7982 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7983 7984 if self._match_text_seq("RESTART", "IDENTITY"): 7985 identity = "RESTART" 7986 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7987 identity = "CONTINUE" 7988 else: 7989 identity = None 7990 7991 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7992 option = self._prev.text 7993 else: 7994 option = None 7995 7996 partition = self._parse_partition() 7997 7998 # Fallback case 7999 if self._curr: 8000 return self._parse_as_command(start) 8001 8002 return self.expression( 8003 exp.TruncateTable, 8004 expressions=expressions, 8005 is_database=is_database, 8006 exists=exists, 8007 cluster=cluster, 8008 identity=identity, 8009 option=option, 8010 partition=partition, 8011 ) 8012 8013 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8014 this = self._parse_ordered(self._parse_opclass) 8015 8016 if not self._match(TokenType.WITH): 8017 return this 8018 8019 op = self._parse_var(any_token=True) 8020 8021 return self.expression(exp.WithOperator, this=this, op=op) 8022 8023 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8024 self._match(TokenType.EQ) 8025 self._match(TokenType.L_PAREN) 8026 8027 opts: t.List[t.Optional[exp.Expression]] = [] 8028 option: exp.Expression | None 8029 while self._curr and not self._match(TokenType.R_PAREN): 8030 if self._match_text_seq("FORMAT_NAME", "="): 8031 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8032 option = self._parse_format_name() 8033 else: 8034 option = self._parse_property() 8035 8036 if option is None: 8037 self.raise_error("Unable to parse option") 8038 break 8039 8040 opts.append(option) 8041 8042 return opts 8043 8044 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8045 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8046 8047 options = [] 8048 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8049 option = self._parse_var(any_token=True) 8050 prev = self._prev.text.upper() 8051 8052 # Different dialects might separate options and values by white space, "=" and "AS" 8053 self._match(TokenType.EQ) 8054 self._match(TokenType.ALIAS) 8055 8056 param = self.expression(exp.CopyParameter, this=option) 8057 8058 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8059 TokenType.L_PAREN, advance=False 8060 ): 8061 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8062 param.set("expressions", self._parse_wrapped_options()) 8063 elif prev == "FILE_FORMAT": 8064 # T-SQL's external file format case 8065 param.set("expression", self._parse_field()) 8066 else: 8067 param.set("expression", self._parse_unquoted_field()) 8068 8069 options.append(param) 8070 self._match(sep) 8071 8072 return options 8073 8074 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8075 expr = self.expression(exp.Credentials) 8076 8077 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8078 expr.set("storage", self._parse_field()) 8079 if self._match_text_seq("CREDENTIALS"): 8080 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8081 creds = ( 8082 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8083 ) 8084 expr.set("credentials", creds) 8085 if self._match_text_seq("ENCRYPTION"): 8086 expr.set("encryption", self._parse_wrapped_options()) 8087 if self._match_text_seq("IAM_ROLE"): 8088 expr.set("iam_role", self._parse_field()) 8089 if self._match_text_seq("REGION"): 8090 expr.set("region", self._parse_field()) 8091 8092 return expr 8093 8094 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8095 return self._parse_field() 8096 8097 def _parse_copy(self) -> exp.Copy | exp.Command: 8098 start = self._prev 8099 8100 self._match(TokenType.INTO) 8101 8102 this = ( 8103 self._parse_select(nested=True, parse_subquery_alias=False) 8104 if self._match(TokenType.L_PAREN, advance=False) 8105 else self._parse_table(schema=True) 8106 ) 8107 8108 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8109 8110 files = self._parse_csv(self._parse_file_location) 8111 credentials = self._parse_credentials() 8112 8113 self._match_text_seq("WITH") 8114 8115 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8116 8117 # Fallback case 8118 if self._curr: 8119 return self._parse_as_command(start) 8120 8121 return self.expression( 8122 exp.Copy, 8123 this=this, 8124 kind=kind, 8125 credentials=credentials, 8126 files=files, 8127 params=params, 8128 ) 8129 8130 def _parse_normalize(self) -> exp.Normalize: 8131 return self.expression( 8132 exp.Normalize, 8133 this=self._parse_bitwise(), 8134 form=self._match(TokenType.COMMA) and self._parse_var(), 8135 ) 8136 8137 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8138 args = self._parse_csv(lambda: self._parse_lambda()) 8139 8140 this = seq_get(args, 0) 8141 decimals = seq_get(args, 1) 8142 8143 return expr_type( 8144 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8145 ) 8146 8147 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8148 if self._match_text_seq("COLUMNS", "(", advance=False): 8149 this = self._parse_function() 8150 if isinstance(this, exp.Columns): 8151 this.set("unpack", True) 8152 return this 8153 8154 return self.expression( 8155 exp.Star, 8156 **{ # type: ignore 8157 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8158 "replace": self._parse_star_op("REPLACE"), 8159 "rename": self._parse_star_op("RENAME"), 8160 }, 8161 ) 8162 8163 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8164 privilege_parts = [] 8165 8166 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8167 # (end of privilege list) or L_PAREN (start of column list) are met 8168 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8169 privilege_parts.append(self._curr.text.upper()) 8170 self._advance() 8171 8172 this = exp.var(" ".join(privilege_parts)) 8173 expressions = ( 8174 self._parse_wrapped_csv(self._parse_column) 8175 if self._match(TokenType.L_PAREN, advance=False) 8176 else None 8177 ) 8178 8179 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8180 8181 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8182 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8183 principal = self._parse_id_var() 8184 8185 if not principal: 8186 return None 8187 8188 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8189 8190 def _parse_grant(self) -> exp.Grant | exp.Command: 8191 start = self._prev 8192 8193 privileges = self._parse_csv(self._parse_grant_privilege) 8194 8195 self._match(TokenType.ON) 8196 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8197 8198 # Attempt to parse the securable e.g. MySQL allows names 8199 # such as "foo.*", "*.*" which are not easily parseable yet 8200 securable = self._try_parse(self._parse_table_parts) 8201 8202 if not securable or not self._match_text_seq("TO"): 8203 return self._parse_as_command(start) 8204 8205 principals = self._parse_csv(self._parse_grant_principal) 8206 8207 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8208 8209 if self._curr: 8210 return self._parse_as_command(start) 8211 8212 return self.expression( 8213 exp.Grant, 8214 privileges=privileges, 8215 kind=kind, 8216 securable=securable, 8217 principals=principals, 8218 grant_option=grant_option, 8219 ) 8220 8221 def _parse_overlay(self) -> exp.Overlay: 8222 return self.expression( 8223 exp.Overlay, 8224 **{ # type: ignore 8225 "this": self._parse_bitwise(), 8226 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8227 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8228 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8229 }, 8230 ) 8231 8232 def _parse_format_name(self) -> exp.Property: 8233 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8234 # for FILE_FORMAT = <format_name> 8235 return self.expression( 8236 exp.Property, 8237 this=exp.var("FORMAT_NAME"), 8238 value=self._parse_string() or self._parse_table_parts(), 8239 ) 8240 8241 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8242 args: t.List[exp.Expression] = [] 8243 8244 if self._match(TokenType.DISTINCT): 8245 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8246 self._match(TokenType.COMMA) 8247 8248 args.extend(self._parse_csv(self._parse_assignment)) 8249 8250 return self.expression( 8251 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8252 ) 8253 8254 def _identifier_expression( 8255 self, token: t.Optional[Token] = None, **kwargs: t.Any 8256 ) -> exp.Identifier: 8257 token = token or self._prev 8258 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8259 expression.update_positions(token) 8260 return expression
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
176class Parser(metaclass=_Parser): 177 """ 178 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 179 180 Args: 181 error_level: The desired error level. 182 Default: ErrorLevel.IMMEDIATE 183 error_message_context: The amount of context to capture from a query string when displaying 184 the error message (in number of characters). 185 Default: 100 186 max_errors: Maximum number of error messages to include in a raised ParseError. 187 This is only relevant if error_level is ErrorLevel.RAISE. 188 Default: 3 189 """ 190 191 FUNCTIONS: t.Dict[str, t.Callable] = { 192 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 193 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 194 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 195 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 196 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 197 ), 198 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 199 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 200 ), 201 "CHAR": lambda args: exp.Chr(expressions=args), 202 "CHR": lambda args: exp.Chr(expressions=args), 203 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 204 "CONCAT": lambda args, dialect: exp.Concat( 205 expressions=args, 206 safe=not dialect.STRICT_STRING_CONCAT, 207 coalesce=dialect.CONCAT_COALESCE, 208 ), 209 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONVERT_TIMEZONE": build_convert_timezone, 215 "DATE_TO_DATE_STR": lambda args: exp.Cast( 216 this=seq_get(args, 0), 217 to=exp.DataType(this=exp.DataType.Type.TEXT), 218 ), 219 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 220 start=seq_get(args, 0), 221 end=seq_get(args, 1), 222 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 223 ), 224 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 225 "HEX": build_hex, 226 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 227 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 228 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 229 "LIKE": build_like, 230 "LOG": build_logarithm, 231 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 232 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 233 "LOWER": build_lower, 234 "LPAD": lambda args: build_pad(args), 235 "LEFTPAD": lambda args: build_pad(args), 236 "LTRIM": lambda args: build_trim(args), 237 "MOD": build_mod, 238 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 239 "RPAD": lambda args: build_pad(args, is_left=False), 240 "RTRIM": lambda args: build_trim(args, is_left=False), 241 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 242 if len(args) != 2 243 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 244 "STRPOS": exp.StrPosition.from_arg_list, 245 "CHARINDEX": lambda args: build_locate_strposition(args), 246 "INSTR": exp.StrPosition.from_arg_list, 247 "LOCATE": lambda args: build_locate_strposition(args), 248 "TIME_TO_TIME_STR": lambda args: exp.Cast( 249 this=seq_get(args, 0), 250 to=exp.DataType(this=exp.DataType.Type.TEXT), 251 ), 252 "TO_HEX": build_hex, 253 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 254 this=exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 start=exp.Literal.number(1), 259 length=exp.Literal.number(10), 260 ), 261 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 262 "UPPER": build_upper, 263 "VAR_MAP": build_var_map, 264 } 265 266 NO_PAREN_FUNCTIONS = { 267 TokenType.CURRENT_DATE: exp.CurrentDate, 268 TokenType.CURRENT_DATETIME: exp.CurrentDate, 269 TokenType.CURRENT_TIME: exp.CurrentTime, 270 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 271 TokenType.CURRENT_USER: exp.CurrentUser, 272 } 273 274 STRUCT_TYPE_TOKENS = { 275 TokenType.NESTED, 276 TokenType.OBJECT, 277 TokenType.STRUCT, 278 TokenType.UNION, 279 } 280 281 NESTED_TYPE_TOKENS = { 282 TokenType.ARRAY, 283 TokenType.LIST, 284 TokenType.LOWCARDINALITY, 285 TokenType.MAP, 286 TokenType.NULLABLE, 287 TokenType.RANGE, 288 *STRUCT_TYPE_TOKENS, 289 } 290 291 ENUM_TYPE_TOKENS = { 292 TokenType.DYNAMIC, 293 TokenType.ENUM, 294 TokenType.ENUM8, 295 TokenType.ENUM16, 296 } 297 298 AGGREGATE_TYPE_TOKENS = { 299 TokenType.AGGREGATEFUNCTION, 300 TokenType.SIMPLEAGGREGATEFUNCTION, 301 } 302 303 TYPE_TOKENS = { 304 TokenType.BIT, 305 TokenType.BOOLEAN, 306 TokenType.TINYINT, 307 TokenType.UTINYINT, 308 TokenType.SMALLINT, 309 TokenType.USMALLINT, 310 TokenType.INT, 311 TokenType.UINT, 312 TokenType.BIGINT, 313 TokenType.UBIGINT, 314 TokenType.INT128, 315 TokenType.UINT128, 316 TokenType.INT256, 317 TokenType.UINT256, 318 TokenType.MEDIUMINT, 319 TokenType.UMEDIUMINT, 320 TokenType.FIXEDSTRING, 321 TokenType.FLOAT, 322 TokenType.DOUBLE, 323 TokenType.UDOUBLE, 324 TokenType.CHAR, 325 TokenType.NCHAR, 326 TokenType.VARCHAR, 327 TokenType.NVARCHAR, 328 TokenType.BPCHAR, 329 TokenType.TEXT, 330 TokenType.MEDIUMTEXT, 331 TokenType.LONGTEXT, 332 TokenType.BLOB, 333 TokenType.MEDIUMBLOB, 334 TokenType.LONGBLOB, 335 TokenType.BINARY, 336 TokenType.VARBINARY, 337 TokenType.JSON, 338 TokenType.JSONB, 339 TokenType.INTERVAL, 340 TokenType.TINYBLOB, 341 TokenType.TINYTEXT, 342 TokenType.TIME, 343 TokenType.TIMETZ, 344 TokenType.TIMESTAMP, 345 TokenType.TIMESTAMP_S, 346 TokenType.TIMESTAMP_MS, 347 TokenType.TIMESTAMP_NS, 348 TokenType.TIMESTAMPTZ, 349 TokenType.TIMESTAMPLTZ, 350 TokenType.TIMESTAMPNTZ, 351 TokenType.DATETIME, 352 TokenType.DATETIME2, 353 TokenType.DATETIME64, 354 TokenType.SMALLDATETIME, 355 TokenType.DATE, 356 TokenType.DATE32, 357 TokenType.INT4RANGE, 358 TokenType.INT4MULTIRANGE, 359 TokenType.INT8RANGE, 360 TokenType.INT8MULTIRANGE, 361 TokenType.NUMRANGE, 362 TokenType.NUMMULTIRANGE, 363 TokenType.TSRANGE, 364 TokenType.TSMULTIRANGE, 365 TokenType.TSTZRANGE, 366 TokenType.TSTZMULTIRANGE, 367 TokenType.DATERANGE, 368 TokenType.DATEMULTIRANGE, 369 TokenType.DECIMAL, 370 TokenType.DECIMAL32, 371 TokenType.DECIMAL64, 372 TokenType.DECIMAL128, 373 TokenType.DECIMAL256, 374 TokenType.UDECIMAL, 375 TokenType.BIGDECIMAL, 376 TokenType.UUID, 377 TokenType.GEOGRAPHY, 378 TokenType.GEOMETRY, 379 TokenType.POINT, 380 TokenType.RING, 381 TokenType.LINESTRING, 382 TokenType.MULTILINESTRING, 383 TokenType.POLYGON, 384 TokenType.MULTIPOLYGON, 385 TokenType.HLLSKETCH, 386 TokenType.HSTORE, 387 TokenType.PSEUDO_TYPE, 388 TokenType.SUPER, 389 TokenType.SERIAL, 390 TokenType.SMALLSERIAL, 391 TokenType.BIGSERIAL, 392 TokenType.XML, 393 TokenType.YEAR, 394 TokenType.USERDEFINED, 395 TokenType.MONEY, 396 TokenType.SMALLMONEY, 397 TokenType.ROWVERSION, 398 TokenType.IMAGE, 399 TokenType.VARIANT, 400 TokenType.VECTOR, 401 TokenType.VOID, 402 TokenType.OBJECT, 403 TokenType.OBJECT_IDENTIFIER, 404 TokenType.INET, 405 TokenType.IPADDRESS, 406 TokenType.IPPREFIX, 407 TokenType.IPV4, 408 TokenType.IPV6, 409 TokenType.UNKNOWN, 410 TokenType.NOTHING, 411 TokenType.NULL, 412 TokenType.NAME, 413 TokenType.TDIGEST, 414 TokenType.DYNAMIC, 415 *ENUM_TYPE_TOKENS, 416 *NESTED_TYPE_TOKENS, 417 *AGGREGATE_TYPE_TOKENS, 418 } 419 420 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 421 TokenType.BIGINT: TokenType.UBIGINT, 422 TokenType.INT: TokenType.UINT, 423 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 424 TokenType.SMALLINT: TokenType.USMALLINT, 425 TokenType.TINYINT: TokenType.UTINYINT, 426 TokenType.DECIMAL: TokenType.UDECIMAL, 427 TokenType.DOUBLE: TokenType.UDOUBLE, 428 } 429 430 SUBQUERY_PREDICATES = { 431 TokenType.ANY: exp.Any, 432 TokenType.ALL: exp.All, 433 TokenType.EXISTS: exp.Exists, 434 TokenType.SOME: exp.Any, 435 } 436 437 RESERVED_TOKENS = { 438 *Tokenizer.SINGLE_TOKENS.values(), 439 TokenType.SELECT, 440 } - {TokenType.IDENTIFIER} 441 442 DB_CREATABLES = { 443 TokenType.DATABASE, 444 TokenType.DICTIONARY, 445 TokenType.FILE_FORMAT, 446 TokenType.MODEL, 447 TokenType.NAMESPACE, 448 TokenType.SCHEMA, 449 TokenType.SEQUENCE, 450 TokenType.SINK, 451 TokenType.SOURCE, 452 TokenType.STAGE, 453 TokenType.STORAGE_INTEGRATION, 454 TokenType.STREAMLIT, 455 TokenType.TABLE, 456 TokenType.TAG, 457 TokenType.VIEW, 458 TokenType.WAREHOUSE, 459 } 460 461 CREATABLES = { 462 TokenType.COLUMN, 463 TokenType.CONSTRAINT, 464 TokenType.FOREIGN_KEY, 465 TokenType.FUNCTION, 466 TokenType.INDEX, 467 TokenType.PROCEDURE, 468 *DB_CREATABLES, 469 } 470 471 ALTERABLES = { 472 TokenType.INDEX, 473 TokenType.TABLE, 474 TokenType.VIEW, 475 } 476 477 # Tokens that can represent identifiers 478 ID_VAR_TOKENS = { 479 TokenType.ALL, 480 TokenType.ATTACH, 481 TokenType.VAR, 482 TokenType.ANTI, 483 TokenType.APPLY, 484 TokenType.ASC, 485 TokenType.ASOF, 486 TokenType.AUTO_INCREMENT, 487 TokenType.BEGIN, 488 TokenType.BPCHAR, 489 TokenType.CACHE, 490 TokenType.CASE, 491 TokenType.COLLATE, 492 TokenType.COMMAND, 493 TokenType.COMMENT, 494 TokenType.COMMIT, 495 TokenType.CONSTRAINT, 496 TokenType.COPY, 497 TokenType.CUBE, 498 TokenType.CURRENT_SCHEMA, 499 TokenType.DEFAULT, 500 TokenType.DELETE, 501 TokenType.DESC, 502 TokenType.DESCRIBE, 503 TokenType.DETACH, 504 TokenType.DICTIONARY, 505 TokenType.DIV, 506 TokenType.END, 507 TokenType.EXECUTE, 508 TokenType.EXPORT, 509 TokenType.ESCAPE, 510 TokenType.FALSE, 511 TokenType.FIRST, 512 TokenType.FILTER, 513 TokenType.FINAL, 514 TokenType.FORMAT, 515 TokenType.FULL, 516 TokenType.GET, 517 TokenType.IDENTIFIER, 518 TokenType.IS, 519 TokenType.ISNULL, 520 TokenType.INTERVAL, 521 TokenType.KEEP, 522 TokenType.KILL, 523 TokenType.LEFT, 524 TokenType.LIMIT, 525 TokenType.LOAD, 526 TokenType.MERGE, 527 TokenType.NATURAL, 528 TokenType.NEXT, 529 TokenType.OFFSET, 530 TokenType.OPERATOR, 531 TokenType.ORDINALITY, 532 TokenType.OVERLAPS, 533 TokenType.OVERWRITE, 534 TokenType.PARTITION, 535 TokenType.PERCENT, 536 TokenType.PIVOT, 537 TokenType.PRAGMA, 538 TokenType.PUT, 539 TokenType.RANGE, 540 TokenType.RECURSIVE, 541 TokenType.REFERENCES, 542 TokenType.REFRESH, 543 TokenType.RENAME, 544 TokenType.REPLACE, 545 TokenType.RIGHT, 546 TokenType.ROLLUP, 547 TokenType.ROW, 548 TokenType.ROWS, 549 TokenType.SEMI, 550 TokenType.SET, 551 TokenType.SETTINGS, 552 TokenType.SHOW, 553 TokenType.TEMPORARY, 554 TokenType.TOP, 555 TokenType.TRUE, 556 TokenType.TRUNCATE, 557 TokenType.UNIQUE, 558 TokenType.UNNEST, 559 TokenType.UNPIVOT, 560 TokenType.UPDATE, 561 TokenType.USE, 562 TokenType.VOLATILE, 563 TokenType.WINDOW, 564 *CREATABLES, 565 *SUBQUERY_PREDICATES, 566 *TYPE_TOKENS, 567 *NO_PAREN_FUNCTIONS, 568 } 569 ID_VAR_TOKENS.remove(TokenType.UNION) 570 571 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 572 TokenType.ANTI, 573 TokenType.APPLY, 574 TokenType.ASOF, 575 TokenType.FULL, 576 TokenType.LEFT, 577 TokenType.LOCK, 578 TokenType.NATURAL, 579 TokenType.RIGHT, 580 TokenType.SEMI, 581 TokenType.WINDOW, 582 } 583 584 ALIAS_TOKENS = ID_VAR_TOKENS 585 586 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 587 588 ARRAY_CONSTRUCTORS = { 589 "ARRAY": exp.Array, 590 "LIST": exp.List, 591 } 592 593 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 594 595 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 596 597 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 598 599 FUNC_TOKENS = { 600 TokenType.COLLATE, 601 TokenType.COMMAND, 602 TokenType.CURRENT_DATE, 603 TokenType.CURRENT_DATETIME, 604 TokenType.CURRENT_SCHEMA, 605 TokenType.CURRENT_TIMESTAMP, 606 TokenType.CURRENT_TIME, 607 TokenType.CURRENT_USER, 608 TokenType.FILTER, 609 TokenType.FIRST, 610 TokenType.FORMAT, 611 TokenType.GLOB, 612 TokenType.IDENTIFIER, 613 TokenType.INDEX, 614 TokenType.ISNULL, 615 TokenType.ILIKE, 616 TokenType.INSERT, 617 TokenType.LIKE, 618 TokenType.MERGE, 619 TokenType.NEXT, 620 TokenType.OFFSET, 621 TokenType.PRIMARY_KEY, 622 TokenType.RANGE, 623 TokenType.REPLACE, 624 TokenType.RLIKE, 625 TokenType.ROW, 626 TokenType.UNNEST, 627 TokenType.VAR, 628 TokenType.LEFT, 629 TokenType.RIGHT, 630 TokenType.SEQUENCE, 631 TokenType.DATE, 632 TokenType.DATETIME, 633 TokenType.TABLE, 634 TokenType.TIMESTAMP, 635 TokenType.TIMESTAMPTZ, 636 TokenType.TRUNCATE, 637 TokenType.WINDOW, 638 TokenType.XOR, 639 *TYPE_TOKENS, 640 *SUBQUERY_PREDICATES, 641 } 642 643 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 644 TokenType.AND: exp.And, 645 } 646 647 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 648 TokenType.COLON_EQ: exp.PropertyEQ, 649 } 650 651 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 652 TokenType.OR: exp.Or, 653 } 654 655 EQUALITY = { 656 TokenType.EQ: exp.EQ, 657 TokenType.NEQ: exp.NEQ, 658 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 659 } 660 661 COMPARISON = { 662 TokenType.GT: exp.GT, 663 TokenType.GTE: exp.GTE, 664 TokenType.LT: exp.LT, 665 TokenType.LTE: exp.LTE, 666 } 667 668 BITWISE = { 669 TokenType.AMP: exp.BitwiseAnd, 670 TokenType.CARET: exp.BitwiseXor, 671 TokenType.PIPE: exp.BitwiseOr, 672 } 673 674 TERM = { 675 TokenType.DASH: exp.Sub, 676 TokenType.PLUS: exp.Add, 677 TokenType.MOD: exp.Mod, 678 TokenType.COLLATE: exp.Collate, 679 } 680 681 FACTOR = { 682 TokenType.DIV: exp.IntDiv, 683 TokenType.LR_ARROW: exp.Distance, 684 TokenType.SLASH: exp.Div, 685 TokenType.STAR: exp.Mul, 686 } 687 688 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 689 690 TIMES = { 691 TokenType.TIME, 692 TokenType.TIMETZ, 693 } 694 695 TIMESTAMPS = { 696 TokenType.TIMESTAMP, 697 TokenType.TIMESTAMPNTZ, 698 TokenType.TIMESTAMPTZ, 699 TokenType.TIMESTAMPLTZ, 700 *TIMES, 701 } 702 703 SET_OPERATIONS = { 704 TokenType.UNION, 705 TokenType.INTERSECT, 706 TokenType.EXCEPT, 707 } 708 709 JOIN_METHODS = { 710 TokenType.ASOF, 711 TokenType.NATURAL, 712 TokenType.POSITIONAL, 713 } 714 715 JOIN_SIDES = { 716 TokenType.LEFT, 717 TokenType.RIGHT, 718 TokenType.FULL, 719 } 720 721 JOIN_KINDS = { 722 TokenType.ANTI, 723 TokenType.CROSS, 724 TokenType.INNER, 725 TokenType.OUTER, 726 TokenType.SEMI, 727 TokenType.STRAIGHT_JOIN, 728 } 729 730 JOIN_HINTS: t.Set[str] = set() 731 732 LAMBDAS = { 733 TokenType.ARROW: lambda self, expressions: self.expression( 734 exp.Lambda, 735 this=self._replace_lambda( 736 self._parse_assignment(), 737 expressions, 738 ), 739 expressions=expressions, 740 ), 741 TokenType.FARROW: lambda self, expressions: self.expression( 742 exp.Kwarg, 743 this=exp.var(expressions[0].name), 744 expression=self._parse_assignment(), 745 ), 746 } 747 748 COLUMN_OPERATORS = { 749 TokenType.DOT: None, 750 TokenType.DOTCOLON: lambda self, this, to: self.expression( 751 exp.JSONCast, 752 this=this, 753 to=to, 754 ), 755 TokenType.DCOLON: lambda self, this, to: self.expression( 756 exp.Cast if self.STRICT_CAST else exp.TryCast, 757 this=this, 758 to=to, 759 ), 760 TokenType.ARROW: lambda self, this, path: self.expression( 761 exp.JSONExtract, 762 this=this, 763 expression=self.dialect.to_json_path(path), 764 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 765 ), 766 TokenType.DARROW: lambda self, this, path: self.expression( 767 exp.JSONExtractScalar, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 773 exp.JSONBExtract, 774 this=this, 775 expression=path, 776 ), 777 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 778 exp.JSONBExtractScalar, 779 this=this, 780 expression=path, 781 ), 782 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 783 exp.JSONBContains, 784 this=this, 785 expression=key, 786 ), 787 } 788 789 EXPRESSION_PARSERS = { 790 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 791 exp.Column: lambda self: self._parse_column(), 792 exp.Condition: lambda self: self._parse_assignment(), 793 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 794 exp.Expression: lambda self: self._parse_expression(), 795 exp.From: lambda self: self._parse_from(joins=True), 796 exp.Group: lambda self: self._parse_group(), 797 exp.Having: lambda self: self._parse_having(), 798 exp.Hint: lambda self: self._parse_hint_body(), 799 exp.Identifier: lambda self: self._parse_id_var(), 800 exp.Join: lambda self: self._parse_join(), 801 exp.Lambda: lambda self: self._parse_lambda(), 802 exp.Lateral: lambda self: self._parse_lateral(), 803 exp.Limit: lambda self: self._parse_limit(), 804 exp.Offset: lambda self: self._parse_offset(), 805 exp.Order: lambda self: self._parse_order(), 806 exp.Ordered: lambda self: self._parse_ordered(), 807 exp.Properties: lambda self: self._parse_properties(), 808 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 809 exp.Qualify: lambda self: self._parse_qualify(), 810 exp.Returning: lambda self: self._parse_returning(), 811 exp.Select: lambda self: self._parse_select(), 812 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 813 exp.Table: lambda self: self._parse_table_parts(), 814 exp.TableAlias: lambda self: self._parse_table_alias(), 815 exp.Tuple: lambda self: self._parse_value(values=False), 816 exp.Whens: lambda self: self._parse_when_matched(), 817 exp.Where: lambda self: self._parse_where(), 818 exp.Window: lambda self: self._parse_named_window(), 819 exp.With: lambda self: self._parse_with(), 820 "JOIN_TYPE": lambda self: self._parse_join_parts(), 821 } 822 823 STATEMENT_PARSERS = { 824 TokenType.ALTER: lambda self: self._parse_alter(), 825 TokenType.ANALYZE: lambda self: self._parse_analyze(), 826 TokenType.BEGIN: lambda self: self._parse_transaction(), 827 TokenType.CACHE: lambda self: self._parse_cache(), 828 TokenType.COMMENT: lambda self: self._parse_comment(), 829 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 830 TokenType.COPY: lambda self: self._parse_copy(), 831 TokenType.CREATE: lambda self: self._parse_create(), 832 TokenType.DELETE: lambda self: self._parse_delete(), 833 TokenType.DESC: lambda self: self._parse_describe(), 834 TokenType.DESCRIBE: lambda self: self._parse_describe(), 835 TokenType.DROP: lambda self: self._parse_drop(), 836 TokenType.GRANT: lambda self: self._parse_grant(), 837 TokenType.INSERT: lambda self: self._parse_insert(), 838 TokenType.KILL: lambda self: self._parse_kill(), 839 TokenType.LOAD: lambda self: self._parse_load(), 840 TokenType.MERGE: lambda self: self._parse_merge(), 841 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 842 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 843 TokenType.REFRESH: lambda self: self._parse_refresh(), 844 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 845 TokenType.SET: lambda self: self._parse_set(), 846 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 847 TokenType.UNCACHE: lambda self: self._parse_uncache(), 848 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 849 TokenType.UPDATE: lambda self: self._parse_update(), 850 TokenType.USE: lambda self: self._parse_use(), 851 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 852 } 853 854 UNARY_PARSERS = { 855 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 856 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 857 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 858 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 859 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 860 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 861 } 862 863 STRING_PARSERS = { 864 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 865 exp.RawString, this=token.text 866 ), 867 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 868 exp.National, this=token.text 869 ), 870 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 871 TokenType.STRING: lambda self, token: self.expression( 872 exp.Literal, this=token.text, is_string=True 873 ), 874 TokenType.UNICODE_STRING: lambda self, token: self.expression( 875 exp.UnicodeString, 876 this=token.text, 877 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 878 ), 879 } 880 881 NUMERIC_PARSERS = { 882 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 883 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 884 TokenType.HEX_STRING: lambda self, token: self.expression( 885 exp.HexString, 886 this=token.text, 887 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 888 ), 889 TokenType.NUMBER: lambda self, token: self.expression( 890 exp.Literal, this=token.text, is_string=False 891 ), 892 } 893 894 PRIMARY_PARSERS = { 895 **STRING_PARSERS, 896 **NUMERIC_PARSERS, 897 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 898 TokenType.NULL: lambda self, _: self.expression(exp.Null), 899 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 900 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 901 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 902 TokenType.STAR: lambda self, _: self._parse_star_ops(), 903 } 904 905 PLACEHOLDER_PARSERS = { 906 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 907 TokenType.PARAMETER: lambda self: self._parse_parameter(), 908 TokenType.COLON: lambda self: ( 909 self.expression(exp.Placeholder, this=self._prev.text) 910 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 911 else None 912 ), 913 } 914 915 RANGE_PARSERS = { 916 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 917 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 918 TokenType.GLOB: binary_range_parser(exp.Glob), 919 TokenType.ILIKE: binary_range_parser(exp.ILike), 920 TokenType.IN: lambda self, this: self._parse_in(this), 921 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 922 TokenType.IS: lambda self, this: self._parse_is(this), 923 TokenType.LIKE: binary_range_parser(exp.Like), 924 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 925 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 926 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 927 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 928 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 929 } 930 931 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 932 "ALLOWED_VALUES": lambda self: self.expression( 933 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 934 ), 935 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 936 "AUTO": lambda self: self._parse_auto_property(), 937 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 938 "BACKUP": lambda self: self.expression( 939 exp.BackupProperty, this=self._parse_var(any_token=True) 940 ), 941 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 942 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 943 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 944 "CHECKSUM": lambda self: self._parse_checksum(), 945 "CLUSTER BY": lambda self: self._parse_cluster(), 946 "CLUSTERED": lambda self: self._parse_clustered_by(), 947 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 948 exp.CollateProperty, **kwargs 949 ), 950 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 951 "CONTAINS": lambda self: self._parse_contains_property(), 952 "COPY": lambda self: self._parse_copy_property(), 953 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 954 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 955 "DEFINER": lambda self: self._parse_definer(), 956 "DETERMINISTIC": lambda self: self.expression( 957 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 958 ), 959 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 960 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 961 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 962 "DISTKEY": lambda self: self._parse_distkey(), 963 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 964 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 965 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 966 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 967 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 968 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 969 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 970 "FREESPACE": lambda self: self._parse_freespace(), 971 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 972 "HEAP": lambda self: self.expression(exp.HeapProperty), 973 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 974 "IMMUTABLE": lambda self: self.expression( 975 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 976 ), 977 "INHERITS": lambda self: self.expression( 978 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 979 ), 980 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 981 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 982 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 983 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 984 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 985 "LIKE": lambda self: self._parse_create_like(), 986 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 987 "LOCK": lambda self: self._parse_locking(), 988 "LOCKING": lambda self: self._parse_locking(), 989 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 990 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 991 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 992 "MODIFIES": lambda self: self._parse_modifies_property(), 993 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 994 "NO": lambda self: self._parse_no_property(), 995 "ON": lambda self: self._parse_on_property(), 996 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 997 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 998 "PARTITION": lambda self: self._parse_partitioned_of(), 999 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1000 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1001 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1002 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1003 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1004 "READS": lambda self: self._parse_reads_property(), 1005 "REMOTE": lambda self: self._parse_remote_with_connection(), 1006 "RETURNS": lambda self: self._parse_returns(), 1007 "STRICT": lambda self: self.expression(exp.StrictProperty), 1008 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1009 "ROW": lambda self: self._parse_row(), 1010 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1011 "SAMPLE": lambda self: self.expression( 1012 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1013 ), 1014 "SECURE": lambda self: self.expression(exp.SecureProperty), 1015 "SECURITY": lambda self: self._parse_security(), 1016 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1017 "SETTINGS": lambda self: self._parse_settings_property(), 1018 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1019 "SORTKEY": lambda self: self._parse_sortkey(), 1020 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1021 "STABLE": lambda self: self.expression( 1022 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1023 ), 1024 "STORED": lambda self: self._parse_stored(), 1025 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1026 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1027 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1028 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1029 "TO": lambda self: self._parse_to_table(), 1030 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1031 "TRANSFORM": lambda self: self.expression( 1032 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1033 ), 1034 "TTL": lambda self: self._parse_ttl(), 1035 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1036 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1037 "VOLATILE": lambda self: self._parse_volatile_property(), 1038 "WITH": lambda self: self._parse_with_property(), 1039 } 1040 1041 CONSTRAINT_PARSERS = { 1042 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1043 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1044 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1045 "CHARACTER SET": lambda self: self.expression( 1046 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1047 ), 1048 "CHECK": lambda self: self.expression( 1049 exp.CheckColumnConstraint, 1050 this=self._parse_wrapped(self._parse_assignment), 1051 enforced=self._match_text_seq("ENFORCED"), 1052 ), 1053 "COLLATE": lambda self: self.expression( 1054 exp.CollateColumnConstraint, 1055 this=self._parse_identifier() or self._parse_column(), 1056 ), 1057 "COMMENT": lambda self: self.expression( 1058 exp.CommentColumnConstraint, this=self._parse_string() 1059 ), 1060 "COMPRESS": lambda self: self._parse_compress(), 1061 "CLUSTERED": lambda self: self.expression( 1062 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1063 ), 1064 "NONCLUSTERED": lambda self: self.expression( 1065 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1066 ), 1067 "DEFAULT": lambda self: self.expression( 1068 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1069 ), 1070 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1071 "EPHEMERAL": lambda self: self.expression( 1072 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1073 ), 1074 "EXCLUDE": lambda self: self.expression( 1075 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1076 ), 1077 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1078 "FORMAT": lambda self: self.expression( 1079 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1080 ), 1081 "GENERATED": lambda self: self._parse_generated_as_identity(), 1082 "IDENTITY": lambda self: self._parse_auto_increment(), 1083 "INLINE": lambda self: self._parse_inline(), 1084 "LIKE": lambda self: self._parse_create_like(), 1085 "NOT": lambda self: self._parse_not_constraint(), 1086 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1087 "ON": lambda self: ( 1088 self._match(TokenType.UPDATE) 1089 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1090 ) 1091 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1092 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1093 "PERIOD": lambda self: self._parse_period_for_system_time(), 1094 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1095 "REFERENCES": lambda self: self._parse_references(match=False), 1096 "TITLE": lambda self: self.expression( 1097 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1098 ), 1099 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1100 "UNIQUE": lambda self: self._parse_unique(), 1101 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1102 "WATERMARK": lambda self: self.expression( 1103 exp.WatermarkColumnConstraint, 1104 this=self._match(TokenType.FOR) and self._parse_column(), 1105 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1106 ), 1107 "WITH": lambda self: self.expression( 1108 exp.Properties, expressions=self._parse_wrapped_properties() 1109 ), 1110 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1111 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1112 } 1113 1114 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1115 klass = ( 1116 exp.PartitionedByBucket 1117 if self._prev.text.upper() == "BUCKET" 1118 else exp.PartitionByTruncate 1119 ) 1120 1121 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1122 this, expression = seq_get(args, 0), seq_get(args, 1) 1123 1124 if isinstance(this, exp.Literal): 1125 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1126 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1127 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1128 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1129 # 1130 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1131 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1132 this, expression = expression, this 1133 1134 return self.expression(klass, this=this, expression=expression) 1135 1136 ALTER_PARSERS = { 1137 "ADD": lambda self: self._parse_alter_table_add(), 1138 "AS": lambda self: self._parse_select(), 1139 "ALTER": lambda self: self._parse_alter_table_alter(), 1140 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1141 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1142 "DROP": lambda self: self._parse_alter_table_drop(), 1143 "RENAME": lambda self: self._parse_alter_table_rename(), 1144 "SET": lambda self: self._parse_alter_table_set(), 1145 "SWAP": lambda self: self.expression( 1146 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1147 ), 1148 } 1149 1150 ALTER_ALTER_PARSERS = { 1151 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1152 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1153 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1154 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1155 } 1156 1157 SCHEMA_UNNAMED_CONSTRAINTS = { 1158 "CHECK", 1159 "EXCLUDE", 1160 "FOREIGN KEY", 1161 "LIKE", 1162 "PERIOD", 1163 "PRIMARY KEY", 1164 "UNIQUE", 1165 "WATERMARK", 1166 "BUCKET", 1167 "TRUNCATE", 1168 } 1169 1170 NO_PAREN_FUNCTION_PARSERS = { 1171 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1172 "CASE": lambda self: self._parse_case(), 1173 "CONNECT_BY_ROOT": lambda self: self.expression( 1174 exp.ConnectByRoot, this=self._parse_column() 1175 ), 1176 "IF": lambda self: self._parse_if(), 1177 } 1178 1179 INVALID_FUNC_NAME_TOKENS = { 1180 TokenType.IDENTIFIER, 1181 TokenType.STRING, 1182 } 1183 1184 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1185 1186 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1187 1188 FUNCTION_PARSERS = { 1189 **{ 1190 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1191 }, 1192 **{ 1193 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1194 }, 1195 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1196 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1197 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1198 "DECODE": lambda self: self._parse_decode(), 1199 "EXTRACT": lambda self: self._parse_extract(), 1200 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1201 "GAP_FILL": lambda self: self._parse_gap_fill(), 1202 "JSON_OBJECT": lambda self: self._parse_json_object(), 1203 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1204 "JSON_TABLE": lambda self: self._parse_json_table(), 1205 "MATCH": lambda self: self._parse_match_against(), 1206 "NORMALIZE": lambda self: self._parse_normalize(), 1207 "OPENJSON": lambda self: self._parse_open_json(), 1208 "OVERLAY": lambda self: self._parse_overlay(), 1209 "POSITION": lambda self: self._parse_position(), 1210 "PREDICT": lambda self: self._parse_predict(), 1211 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1212 "STRING_AGG": lambda self: self._parse_string_agg(), 1213 "SUBSTRING": lambda self: self._parse_substring(), 1214 "TRIM": lambda self: self._parse_trim(), 1215 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1216 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1217 "XMLELEMENT": lambda self: self.expression( 1218 exp.XMLElement, 1219 this=self._match_text_seq("NAME") and self._parse_id_var(), 1220 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1221 ), 1222 "XMLTABLE": lambda self: self._parse_xml_table(), 1223 } 1224 1225 QUERY_MODIFIER_PARSERS = { 1226 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1227 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1228 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1229 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1230 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1231 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1232 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1233 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1234 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1235 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1236 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1237 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1238 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1239 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1240 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1241 TokenType.CLUSTER_BY: lambda self: ( 1242 "cluster", 1243 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1244 ), 1245 TokenType.DISTRIBUTE_BY: lambda self: ( 1246 "distribute", 1247 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1248 ), 1249 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1250 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1251 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1252 } 1253 1254 SET_PARSERS = { 1255 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1256 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1257 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1258 "TRANSACTION": lambda self: self._parse_set_transaction(), 1259 } 1260 1261 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1262 1263 TYPE_LITERAL_PARSERS = { 1264 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1265 } 1266 1267 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1268 1269 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1270 1271 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1272 1273 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1274 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1275 "ISOLATION": ( 1276 ("LEVEL", "REPEATABLE", "READ"), 1277 ("LEVEL", "READ", "COMMITTED"), 1278 ("LEVEL", "READ", "UNCOMITTED"), 1279 ("LEVEL", "SERIALIZABLE"), 1280 ), 1281 "READ": ("WRITE", "ONLY"), 1282 } 1283 1284 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1285 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1286 ) 1287 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1288 1289 CREATE_SEQUENCE: OPTIONS_TYPE = { 1290 "SCALE": ("EXTEND", "NOEXTEND"), 1291 "SHARD": ("EXTEND", "NOEXTEND"), 1292 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1293 **dict.fromkeys( 1294 ( 1295 "SESSION", 1296 "GLOBAL", 1297 "KEEP", 1298 "NOKEEP", 1299 "ORDER", 1300 "NOORDER", 1301 "NOCACHE", 1302 "CYCLE", 1303 "NOCYCLE", 1304 "NOMINVALUE", 1305 "NOMAXVALUE", 1306 "NOSCALE", 1307 "NOSHARD", 1308 ), 1309 tuple(), 1310 ), 1311 } 1312 1313 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1314 1315 USABLES: OPTIONS_TYPE = dict.fromkeys( 1316 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1317 ) 1318 1319 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1320 1321 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1322 "TYPE": ("EVOLUTION",), 1323 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1324 } 1325 1326 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1327 1328 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1329 1330 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1331 "NOT": ("ENFORCED",), 1332 "MATCH": ( 1333 "FULL", 1334 "PARTIAL", 1335 "SIMPLE", 1336 ), 1337 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1338 "USING": ( 1339 "BTREE", 1340 "HASH", 1341 ), 1342 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1343 } 1344 1345 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1346 1347 CLONE_KEYWORDS = {"CLONE", "COPY"} 1348 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1349 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1350 1351 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1352 1353 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1354 1355 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1356 1357 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1358 1359 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1360 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1361 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1362 1363 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1364 1365 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1366 1367 ADD_CONSTRAINT_TOKENS = { 1368 TokenType.CONSTRAINT, 1369 TokenType.FOREIGN_KEY, 1370 TokenType.INDEX, 1371 TokenType.KEY, 1372 TokenType.PRIMARY_KEY, 1373 TokenType.UNIQUE, 1374 } 1375 1376 DISTINCT_TOKENS = {TokenType.DISTINCT} 1377 1378 NULL_TOKENS = {TokenType.NULL} 1379 1380 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1381 1382 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1383 1384 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1385 1386 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1387 1388 ODBC_DATETIME_LITERALS = { 1389 "d": exp.Date, 1390 "t": exp.Time, 1391 "ts": exp.Timestamp, 1392 } 1393 1394 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1395 1396 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1397 1398 # The style options for the DESCRIBE statement 1399 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1400 1401 # The style options for the ANALYZE statement 1402 ANALYZE_STYLES = { 1403 "BUFFER_USAGE_LIMIT", 1404 "FULL", 1405 "LOCAL", 1406 "NO_WRITE_TO_BINLOG", 1407 "SAMPLE", 1408 "SKIP_LOCKED", 1409 "VERBOSE", 1410 } 1411 1412 ANALYZE_EXPRESSION_PARSERS = { 1413 "ALL": lambda self: self._parse_analyze_columns(), 1414 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1415 "DELETE": lambda self: self._parse_analyze_delete(), 1416 "DROP": lambda self: self._parse_analyze_histogram(), 1417 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1418 "LIST": lambda self: self._parse_analyze_list(), 1419 "PREDICATE": lambda self: self._parse_analyze_columns(), 1420 "UPDATE": lambda self: self._parse_analyze_histogram(), 1421 "VALIDATE": lambda self: self._parse_analyze_validate(), 1422 } 1423 1424 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1425 1426 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1427 1428 OPERATION_MODIFIERS: t.Set[str] = set() 1429 1430 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1431 1432 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1433 1434 STRICT_CAST = True 1435 1436 PREFIXED_PIVOT_COLUMNS = False 1437 IDENTIFY_PIVOT_STRINGS = False 1438 1439 LOG_DEFAULTS_TO_LN = False 1440 1441 # Whether ADD is present for each column added by ALTER TABLE 1442 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1443 1444 # Whether the table sample clause expects CSV syntax 1445 TABLESAMPLE_CSV = False 1446 1447 # The default method used for table sampling 1448 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1449 1450 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1451 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1452 1453 # Whether the TRIM function expects the characters to trim as its first argument 1454 TRIM_PATTERN_FIRST = False 1455 1456 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1457 STRING_ALIASES = False 1458 1459 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1460 MODIFIERS_ATTACHED_TO_SET_OP = True 1461 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1462 1463 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1464 NO_PAREN_IF_COMMANDS = True 1465 1466 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1467 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1468 1469 # Whether the `:` operator is used to extract a value from a VARIANT column 1470 COLON_IS_VARIANT_EXTRACT = False 1471 1472 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1473 # If this is True and '(' is not found, the keyword will be treated as an identifier 1474 VALUES_FOLLOWED_BY_PAREN = True 1475 1476 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1477 SUPPORTS_IMPLICIT_UNNEST = False 1478 1479 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1480 INTERVAL_SPANS = True 1481 1482 # Whether a PARTITION clause can follow a table reference 1483 SUPPORTS_PARTITION_SELECTION = False 1484 1485 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1486 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1487 1488 # Whether the 'AS' keyword is optional in the CTE definition syntax 1489 OPTIONAL_ALIAS_TOKEN_CTE = True 1490 1491 __slots__ = ( 1492 "error_level", 1493 "error_message_context", 1494 "max_errors", 1495 "dialect", 1496 "sql", 1497 "errors", 1498 "_tokens", 1499 "_index", 1500 "_curr", 1501 "_next", 1502 "_prev", 1503 "_prev_comments", 1504 ) 1505 1506 # Autofilled 1507 SHOW_TRIE: t.Dict = {} 1508 SET_TRIE: t.Dict = {} 1509 1510 def __init__( 1511 self, 1512 error_level: t.Optional[ErrorLevel] = None, 1513 error_message_context: int = 100, 1514 max_errors: int = 3, 1515 dialect: DialectType = None, 1516 ): 1517 from sqlglot.dialects import Dialect 1518 1519 self.error_level = error_level or ErrorLevel.IMMEDIATE 1520 self.error_message_context = error_message_context 1521 self.max_errors = max_errors 1522 self.dialect = Dialect.get_or_raise(dialect) 1523 self.reset() 1524 1525 def reset(self): 1526 self.sql = "" 1527 self.errors = [] 1528 self._tokens = [] 1529 self._index = 0 1530 self._curr = None 1531 self._next = None 1532 self._prev = None 1533 self._prev_comments = None 1534 1535 def parse( 1536 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1537 ) -> t.List[t.Optional[exp.Expression]]: 1538 """ 1539 Parses a list of tokens and returns a list of syntax trees, one tree 1540 per parsed SQL statement. 1541 1542 Args: 1543 raw_tokens: The list of tokens. 1544 sql: The original SQL string, used to produce helpful debug messages. 1545 1546 Returns: 1547 The list of the produced syntax trees. 1548 """ 1549 return self._parse( 1550 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1551 ) 1552 1553 def parse_into( 1554 self, 1555 expression_types: exp.IntoType, 1556 raw_tokens: t.List[Token], 1557 sql: t.Optional[str] = None, 1558 ) -> t.List[t.Optional[exp.Expression]]: 1559 """ 1560 Parses a list of tokens into a given Expression type. If a collection of Expression 1561 types is given instead, this method will try to parse the token list into each one 1562 of them, stopping at the first for which the parsing succeeds. 1563 1564 Args: 1565 expression_types: The expression type(s) to try and parse the token list into. 1566 raw_tokens: The list of tokens. 1567 sql: The original SQL string, used to produce helpful debug messages. 1568 1569 Returns: 1570 The target Expression. 1571 """ 1572 errors = [] 1573 for expression_type in ensure_list(expression_types): 1574 parser = self.EXPRESSION_PARSERS.get(expression_type) 1575 if not parser: 1576 raise TypeError(f"No parser registered for {expression_type}") 1577 1578 try: 1579 return self._parse(parser, raw_tokens, sql) 1580 except ParseError as e: 1581 e.errors[0]["into_expression"] = expression_type 1582 errors.append(e) 1583 1584 raise ParseError( 1585 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1586 errors=merge_errors(errors), 1587 ) from errors[-1] 1588 1589 def _parse( 1590 self, 1591 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1592 raw_tokens: t.List[Token], 1593 sql: t.Optional[str] = None, 1594 ) -> t.List[t.Optional[exp.Expression]]: 1595 self.reset() 1596 self.sql = sql or "" 1597 1598 total = len(raw_tokens) 1599 chunks: t.List[t.List[Token]] = [[]] 1600 1601 for i, token in enumerate(raw_tokens): 1602 if token.token_type == TokenType.SEMICOLON: 1603 if token.comments: 1604 chunks.append([token]) 1605 1606 if i < total - 1: 1607 chunks.append([]) 1608 else: 1609 chunks[-1].append(token) 1610 1611 expressions = [] 1612 1613 for tokens in chunks: 1614 self._index = -1 1615 self._tokens = tokens 1616 self._advance() 1617 1618 expressions.append(parse_method(self)) 1619 1620 if self._index < len(self._tokens): 1621 self.raise_error("Invalid expression / Unexpected token") 1622 1623 self.check_errors() 1624 1625 return expressions 1626 1627 def check_errors(self) -> None: 1628 """Logs or raises any found errors, depending on the chosen error level setting.""" 1629 if self.error_level == ErrorLevel.WARN: 1630 for error in self.errors: 1631 logger.error(str(error)) 1632 elif self.error_level == ErrorLevel.RAISE and self.errors: 1633 raise ParseError( 1634 concat_messages(self.errors, self.max_errors), 1635 errors=merge_errors(self.errors), 1636 ) 1637 1638 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1639 """ 1640 Appends an error in the list of recorded errors or raises it, depending on the chosen 1641 error level setting. 1642 """ 1643 token = token or self._curr or self._prev or Token.string("") 1644 start = token.start 1645 end = token.end + 1 1646 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1647 highlight = self.sql[start:end] 1648 end_context = self.sql[end : end + self.error_message_context] 1649 1650 error = ParseError.new( 1651 f"{message}. Line {token.line}, Col: {token.col}.\n" 1652 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1653 description=message, 1654 line=token.line, 1655 col=token.col, 1656 start_context=start_context, 1657 highlight=highlight, 1658 end_context=end_context, 1659 ) 1660 1661 if self.error_level == ErrorLevel.IMMEDIATE: 1662 raise error 1663 1664 self.errors.append(error) 1665 1666 def expression( 1667 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1668 ) -> E: 1669 """ 1670 Creates a new, validated Expression. 1671 1672 Args: 1673 exp_class: The expression class to instantiate. 1674 comments: An optional list of comments to attach to the expression. 1675 kwargs: The arguments to set for the expression along with their respective values. 1676 1677 Returns: 1678 The target expression. 1679 """ 1680 instance = exp_class(**kwargs) 1681 instance.add_comments(comments) if comments else self._add_comments(instance) 1682 return self.validate_expression(instance) 1683 1684 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1685 if expression and self._prev_comments: 1686 expression.add_comments(self._prev_comments) 1687 self._prev_comments = None 1688 1689 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1690 """ 1691 Validates an Expression, making sure that all its mandatory arguments are set. 1692 1693 Args: 1694 expression: The expression to validate. 1695 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1696 1697 Returns: 1698 The validated expression. 1699 """ 1700 if self.error_level != ErrorLevel.IGNORE: 1701 for error_message in expression.error_messages(args): 1702 self.raise_error(error_message) 1703 1704 return expression 1705 1706 def _find_sql(self, start: Token, end: Token) -> str: 1707 return self.sql[start.start : end.end + 1] 1708 1709 def _is_connected(self) -> bool: 1710 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1711 1712 def _advance(self, times: int = 1) -> None: 1713 self._index += times 1714 self._curr = seq_get(self._tokens, self._index) 1715 self._next = seq_get(self._tokens, self._index + 1) 1716 1717 if self._index > 0: 1718 self._prev = self._tokens[self._index - 1] 1719 self._prev_comments = self._prev.comments 1720 else: 1721 self._prev = None 1722 self._prev_comments = None 1723 1724 def _retreat(self, index: int) -> None: 1725 if index != self._index: 1726 self._advance(index - self._index) 1727 1728 def _warn_unsupported(self) -> None: 1729 if len(self._tokens) <= 1: 1730 return 1731 1732 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1733 # interested in emitting a warning for the one being currently processed. 1734 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1735 1736 logger.warning( 1737 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1738 ) 1739 1740 def _parse_command(self) -> exp.Command: 1741 self._warn_unsupported() 1742 return self.expression( 1743 exp.Command, 1744 comments=self._prev_comments, 1745 this=self._prev.text.upper(), 1746 expression=self._parse_string(), 1747 ) 1748 1749 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1750 """ 1751 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1752 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1753 solve this by setting & resetting the parser state accordingly 1754 """ 1755 index = self._index 1756 error_level = self.error_level 1757 1758 self.error_level = ErrorLevel.IMMEDIATE 1759 try: 1760 this = parse_method() 1761 except ParseError: 1762 this = None 1763 finally: 1764 if not this or retreat: 1765 self._retreat(index) 1766 self.error_level = error_level 1767 1768 return this 1769 1770 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1771 start = self._prev 1772 exists = self._parse_exists() if allow_exists else None 1773 1774 self._match(TokenType.ON) 1775 1776 materialized = self._match_text_seq("MATERIALIZED") 1777 kind = self._match_set(self.CREATABLES) and self._prev 1778 if not kind: 1779 return self._parse_as_command(start) 1780 1781 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1782 this = self._parse_user_defined_function(kind=kind.token_type) 1783 elif kind.token_type == TokenType.TABLE: 1784 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1785 elif kind.token_type == TokenType.COLUMN: 1786 this = self._parse_column() 1787 else: 1788 this = self._parse_id_var() 1789 1790 self._match(TokenType.IS) 1791 1792 return self.expression( 1793 exp.Comment, 1794 this=this, 1795 kind=kind.text, 1796 expression=self._parse_string(), 1797 exists=exists, 1798 materialized=materialized, 1799 ) 1800 1801 def _parse_to_table( 1802 self, 1803 ) -> exp.ToTableProperty: 1804 table = self._parse_table_parts(schema=True) 1805 return self.expression(exp.ToTableProperty, this=table) 1806 1807 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1808 def _parse_ttl(self) -> exp.Expression: 1809 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1810 this = self._parse_bitwise() 1811 1812 if self._match_text_seq("DELETE"): 1813 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1814 if self._match_text_seq("RECOMPRESS"): 1815 return self.expression( 1816 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1817 ) 1818 if self._match_text_seq("TO", "DISK"): 1819 return self.expression( 1820 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1821 ) 1822 if self._match_text_seq("TO", "VOLUME"): 1823 return self.expression( 1824 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1825 ) 1826 1827 return this 1828 1829 expressions = self._parse_csv(_parse_ttl_action) 1830 where = self._parse_where() 1831 group = self._parse_group() 1832 1833 aggregates = None 1834 if group and self._match(TokenType.SET): 1835 aggregates = self._parse_csv(self._parse_set_item) 1836 1837 return self.expression( 1838 exp.MergeTreeTTL, 1839 expressions=expressions, 1840 where=where, 1841 group=group, 1842 aggregates=aggregates, 1843 ) 1844 1845 def _parse_statement(self) -> t.Optional[exp.Expression]: 1846 if self._curr is None: 1847 return None 1848 1849 if self._match_set(self.STATEMENT_PARSERS): 1850 comments = self._prev_comments 1851 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1852 stmt.add_comments(comments, prepend=True) 1853 return stmt 1854 1855 if self._match_set(self.dialect.tokenizer.COMMANDS): 1856 return self._parse_command() 1857 1858 expression = self._parse_expression() 1859 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1860 return self._parse_query_modifiers(expression) 1861 1862 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1863 start = self._prev 1864 temporary = self._match(TokenType.TEMPORARY) 1865 materialized = self._match_text_seq("MATERIALIZED") 1866 1867 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1868 if not kind: 1869 return self._parse_as_command(start) 1870 1871 concurrently = self._match_text_seq("CONCURRENTLY") 1872 if_exists = exists or self._parse_exists() 1873 1874 if kind == "COLUMN": 1875 this = self._parse_column() 1876 else: 1877 this = self._parse_table_parts( 1878 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1879 ) 1880 1881 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1882 1883 if self._match(TokenType.L_PAREN, advance=False): 1884 expressions = self._parse_wrapped_csv(self._parse_types) 1885 else: 1886 expressions = None 1887 1888 return self.expression( 1889 exp.Drop, 1890 exists=if_exists, 1891 this=this, 1892 expressions=expressions, 1893 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1894 temporary=temporary, 1895 materialized=materialized, 1896 cascade=self._match_text_seq("CASCADE"), 1897 constraints=self._match_text_seq("CONSTRAINTS"), 1898 purge=self._match_text_seq("PURGE"), 1899 cluster=cluster, 1900 concurrently=concurrently, 1901 ) 1902 1903 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1904 return ( 1905 self._match_text_seq("IF") 1906 and (not not_ or self._match(TokenType.NOT)) 1907 and self._match(TokenType.EXISTS) 1908 ) 1909 1910 def _parse_create(self) -> exp.Create | exp.Command: 1911 # Note: this can't be None because we've matched a statement parser 1912 start = self._prev 1913 1914 replace = ( 1915 start.token_type == TokenType.REPLACE 1916 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1917 or self._match_pair(TokenType.OR, TokenType.ALTER) 1918 ) 1919 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1920 1921 unique = self._match(TokenType.UNIQUE) 1922 1923 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1924 clustered = True 1925 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1926 "COLUMNSTORE" 1927 ): 1928 clustered = False 1929 else: 1930 clustered = None 1931 1932 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1933 self._advance() 1934 1935 properties = None 1936 create_token = self._match_set(self.CREATABLES) and self._prev 1937 1938 if not create_token: 1939 # exp.Properties.Location.POST_CREATE 1940 properties = self._parse_properties() 1941 create_token = self._match_set(self.CREATABLES) and self._prev 1942 1943 if not properties or not create_token: 1944 return self._parse_as_command(start) 1945 1946 concurrently = self._match_text_seq("CONCURRENTLY") 1947 exists = self._parse_exists(not_=True) 1948 this = None 1949 expression: t.Optional[exp.Expression] = None 1950 indexes = None 1951 no_schema_binding = None 1952 begin = None 1953 end = None 1954 clone = None 1955 1956 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1957 nonlocal properties 1958 if properties and temp_props: 1959 properties.expressions.extend(temp_props.expressions) 1960 elif temp_props: 1961 properties = temp_props 1962 1963 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1964 this = self._parse_user_defined_function(kind=create_token.token_type) 1965 1966 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1967 extend_props(self._parse_properties()) 1968 1969 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1970 extend_props(self._parse_properties()) 1971 1972 if not expression: 1973 if self._match(TokenType.COMMAND): 1974 expression = self._parse_as_command(self._prev) 1975 else: 1976 begin = self._match(TokenType.BEGIN) 1977 return_ = self._match_text_seq("RETURN") 1978 1979 if self._match(TokenType.STRING, advance=False): 1980 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1981 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1982 expression = self._parse_string() 1983 extend_props(self._parse_properties()) 1984 else: 1985 expression = self._parse_user_defined_function_expression() 1986 1987 end = self._match_text_seq("END") 1988 1989 if return_: 1990 expression = self.expression(exp.Return, this=expression) 1991 elif create_token.token_type == TokenType.INDEX: 1992 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1993 if not self._match(TokenType.ON): 1994 index = self._parse_id_var() 1995 anonymous = False 1996 else: 1997 index = None 1998 anonymous = True 1999 2000 this = self._parse_index(index=index, anonymous=anonymous) 2001 elif create_token.token_type in self.DB_CREATABLES: 2002 table_parts = self._parse_table_parts( 2003 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2004 ) 2005 2006 # exp.Properties.Location.POST_NAME 2007 self._match(TokenType.COMMA) 2008 extend_props(self._parse_properties(before=True)) 2009 2010 this = self._parse_schema(this=table_parts) 2011 2012 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2013 extend_props(self._parse_properties()) 2014 2015 has_alias = self._match(TokenType.ALIAS) 2016 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2017 # exp.Properties.Location.POST_ALIAS 2018 extend_props(self._parse_properties()) 2019 2020 if create_token.token_type == TokenType.SEQUENCE: 2021 expression = self._parse_types() 2022 extend_props(self._parse_properties()) 2023 else: 2024 expression = self._parse_ddl_select() 2025 2026 # Some dialects also support using a table as an alias instead of a SELECT. 2027 # Here we fallback to this as an alternative. 2028 if not expression and has_alias: 2029 expression = self._try_parse(self._parse_table_parts) 2030 2031 if create_token.token_type == TokenType.TABLE: 2032 # exp.Properties.Location.POST_EXPRESSION 2033 extend_props(self._parse_properties()) 2034 2035 indexes = [] 2036 while True: 2037 index = self._parse_index() 2038 2039 # exp.Properties.Location.POST_INDEX 2040 extend_props(self._parse_properties()) 2041 if not index: 2042 break 2043 else: 2044 self._match(TokenType.COMMA) 2045 indexes.append(index) 2046 elif create_token.token_type == TokenType.VIEW: 2047 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2048 no_schema_binding = True 2049 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2050 extend_props(self._parse_properties()) 2051 2052 shallow = self._match_text_seq("SHALLOW") 2053 2054 if self._match_texts(self.CLONE_KEYWORDS): 2055 copy = self._prev.text.lower() == "copy" 2056 clone = self.expression( 2057 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2058 ) 2059 2060 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2061 return self._parse_as_command(start) 2062 2063 create_kind_text = create_token.text.upper() 2064 return self.expression( 2065 exp.Create, 2066 this=this, 2067 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2068 replace=replace, 2069 refresh=refresh, 2070 unique=unique, 2071 expression=expression, 2072 exists=exists, 2073 properties=properties, 2074 indexes=indexes, 2075 no_schema_binding=no_schema_binding, 2076 begin=begin, 2077 end=end, 2078 clone=clone, 2079 concurrently=concurrently, 2080 clustered=clustered, 2081 ) 2082 2083 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2084 seq = exp.SequenceProperties() 2085 2086 options = [] 2087 index = self._index 2088 2089 while self._curr: 2090 self._match(TokenType.COMMA) 2091 if self._match_text_seq("INCREMENT"): 2092 self._match_text_seq("BY") 2093 self._match_text_seq("=") 2094 seq.set("increment", self._parse_term()) 2095 elif self._match_text_seq("MINVALUE"): 2096 seq.set("minvalue", self._parse_term()) 2097 elif self._match_text_seq("MAXVALUE"): 2098 seq.set("maxvalue", self._parse_term()) 2099 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2100 self._match_text_seq("=") 2101 seq.set("start", self._parse_term()) 2102 elif self._match_text_seq("CACHE"): 2103 # T-SQL allows empty CACHE which is initialized dynamically 2104 seq.set("cache", self._parse_number() or True) 2105 elif self._match_text_seq("OWNED", "BY"): 2106 # "OWNED BY NONE" is the default 2107 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2108 else: 2109 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2110 if opt: 2111 options.append(opt) 2112 else: 2113 break 2114 2115 seq.set("options", options if options else None) 2116 return None if self._index == index else seq 2117 2118 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2119 # only used for teradata currently 2120 self._match(TokenType.COMMA) 2121 2122 kwargs = { 2123 "no": self._match_text_seq("NO"), 2124 "dual": self._match_text_seq("DUAL"), 2125 "before": self._match_text_seq("BEFORE"), 2126 "default": self._match_text_seq("DEFAULT"), 2127 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2128 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2129 "after": self._match_text_seq("AFTER"), 2130 "minimum": self._match_texts(("MIN", "MINIMUM")), 2131 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2132 } 2133 2134 if self._match_texts(self.PROPERTY_PARSERS): 2135 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2136 try: 2137 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2138 except TypeError: 2139 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2140 2141 return None 2142 2143 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2144 return self._parse_wrapped_csv(self._parse_property) 2145 2146 def _parse_property(self) -> t.Optional[exp.Expression]: 2147 if self._match_texts(self.PROPERTY_PARSERS): 2148 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2149 2150 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2151 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2152 2153 if self._match_text_seq("COMPOUND", "SORTKEY"): 2154 return self._parse_sortkey(compound=True) 2155 2156 if self._match_text_seq("SQL", "SECURITY"): 2157 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2158 2159 index = self._index 2160 key = self._parse_column() 2161 2162 if not self._match(TokenType.EQ): 2163 self._retreat(index) 2164 return self._parse_sequence_properties() 2165 2166 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2167 if isinstance(key, exp.Column): 2168 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2169 2170 value = self._parse_bitwise() or self._parse_var(any_token=True) 2171 2172 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2173 if isinstance(value, exp.Column): 2174 value = exp.var(value.name) 2175 2176 return self.expression(exp.Property, this=key, value=value) 2177 2178 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2179 if self._match_text_seq("BY"): 2180 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2181 2182 self._match(TokenType.ALIAS) 2183 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2184 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2185 2186 return self.expression( 2187 exp.FileFormatProperty, 2188 this=( 2189 self.expression( 2190 exp.InputOutputFormat, 2191 input_format=input_format, 2192 output_format=output_format, 2193 ) 2194 if input_format or output_format 2195 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2196 ), 2197 ) 2198 2199 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2200 field = self._parse_field() 2201 if isinstance(field, exp.Identifier) and not field.quoted: 2202 field = exp.var(field) 2203 2204 return field 2205 2206 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2207 self._match(TokenType.EQ) 2208 self._match(TokenType.ALIAS) 2209 2210 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2211 2212 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2213 properties = [] 2214 while True: 2215 if before: 2216 prop = self._parse_property_before() 2217 else: 2218 prop = self._parse_property() 2219 if not prop: 2220 break 2221 for p in ensure_list(prop): 2222 properties.append(p) 2223 2224 if properties: 2225 return self.expression(exp.Properties, expressions=properties) 2226 2227 return None 2228 2229 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2230 return self.expression( 2231 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2232 ) 2233 2234 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2235 if self._match_texts(("DEFINER", "INVOKER")): 2236 security_specifier = self._prev.text.upper() 2237 return self.expression(exp.SecurityProperty, this=security_specifier) 2238 return None 2239 2240 def _parse_settings_property(self) -> exp.SettingsProperty: 2241 return self.expression( 2242 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2243 ) 2244 2245 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2246 if self._index >= 2: 2247 pre_volatile_token = self._tokens[self._index - 2] 2248 else: 2249 pre_volatile_token = None 2250 2251 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2252 return exp.VolatileProperty() 2253 2254 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2255 2256 def _parse_retention_period(self) -> exp.Var: 2257 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2258 number = self._parse_number() 2259 number_str = f"{number} " if number else "" 2260 unit = self._parse_var(any_token=True) 2261 return exp.var(f"{number_str}{unit}") 2262 2263 def _parse_system_versioning_property( 2264 self, with_: bool = False 2265 ) -> exp.WithSystemVersioningProperty: 2266 self._match(TokenType.EQ) 2267 prop = self.expression( 2268 exp.WithSystemVersioningProperty, 2269 **{ # type: ignore 2270 "on": True, 2271 "with": with_, 2272 }, 2273 ) 2274 2275 if self._match_text_seq("OFF"): 2276 prop.set("on", False) 2277 return prop 2278 2279 self._match(TokenType.ON) 2280 if self._match(TokenType.L_PAREN): 2281 while self._curr and not self._match(TokenType.R_PAREN): 2282 if self._match_text_seq("HISTORY_TABLE", "="): 2283 prop.set("this", self._parse_table_parts()) 2284 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2285 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2286 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2287 prop.set("retention_period", self._parse_retention_period()) 2288 2289 self._match(TokenType.COMMA) 2290 2291 return prop 2292 2293 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2294 self._match(TokenType.EQ) 2295 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2296 prop = self.expression(exp.DataDeletionProperty, on=on) 2297 2298 if self._match(TokenType.L_PAREN): 2299 while self._curr and not self._match(TokenType.R_PAREN): 2300 if self._match_text_seq("FILTER_COLUMN", "="): 2301 prop.set("filter_column", self._parse_column()) 2302 elif self._match_text_seq("RETENTION_PERIOD", "="): 2303 prop.set("retention_period", self._parse_retention_period()) 2304 2305 self._match(TokenType.COMMA) 2306 2307 return prop 2308 2309 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2310 kind = "HASH" 2311 expressions: t.Optional[t.List[exp.Expression]] = None 2312 if self._match_text_seq("BY", "HASH"): 2313 expressions = self._parse_wrapped_csv(self._parse_id_var) 2314 elif self._match_text_seq("BY", "RANDOM"): 2315 kind = "RANDOM" 2316 2317 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2318 buckets: t.Optional[exp.Expression] = None 2319 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2320 buckets = self._parse_number() 2321 2322 return self.expression( 2323 exp.DistributedByProperty, 2324 expressions=expressions, 2325 kind=kind, 2326 buckets=buckets, 2327 order=self._parse_order(), 2328 ) 2329 2330 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2331 self._match_text_seq("KEY") 2332 expressions = self._parse_wrapped_id_vars() 2333 return self.expression(expr_type, expressions=expressions) 2334 2335 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2336 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2337 prop = self._parse_system_versioning_property(with_=True) 2338 self._match_r_paren() 2339 return prop 2340 2341 if self._match(TokenType.L_PAREN, advance=False): 2342 return self._parse_wrapped_properties() 2343 2344 if self._match_text_seq("JOURNAL"): 2345 return self._parse_withjournaltable() 2346 2347 if self._match_texts(self.VIEW_ATTRIBUTES): 2348 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2349 2350 if self._match_text_seq("DATA"): 2351 return self._parse_withdata(no=False) 2352 elif self._match_text_seq("NO", "DATA"): 2353 return self._parse_withdata(no=True) 2354 2355 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2356 return self._parse_serde_properties(with_=True) 2357 2358 if self._match(TokenType.SCHEMA): 2359 return self.expression( 2360 exp.WithSchemaBindingProperty, 2361 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2362 ) 2363 2364 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2365 return self.expression( 2366 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2367 ) 2368 2369 if not self._next: 2370 return None 2371 2372 return self._parse_withisolatedloading() 2373 2374 def _parse_procedure_option(self) -> exp.Expression | None: 2375 if self._match_text_seq("EXECUTE", "AS"): 2376 return self.expression( 2377 exp.ExecuteAsProperty, 2378 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2379 or self._parse_string(), 2380 ) 2381 2382 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2383 2384 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2385 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2386 self._match(TokenType.EQ) 2387 2388 user = self._parse_id_var() 2389 self._match(TokenType.PARAMETER) 2390 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2391 2392 if not user or not host: 2393 return None 2394 2395 return exp.DefinerProperty(this=f"{user}@{host}") 2396 2397 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2398 self._match(TokenType.TABLE) 2399 self._match(TokenType.EQ) 2400 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2401 2402 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2403 return self.expression(exp.LogProperty, no=no) 2404 2405 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2406 return self.expression(exp.JournalProperty, **kwargs) 2407 2408 def _parse_checksum(self) -> exp.ChecksumProperty: 2409 self._match(TokenType.EQ) 2410 2411 on = None 2412 if self._match(TokenType.ON): 2413 on = True 2414 elif self._match_text_seq("OFF"): 2415 on = False 2416 2417 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2418 2419 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2420 return self.expression( 2421 exp.Cluster, 2422 expressions=( 2423 self._parse_wrapped_csv(self._parse_ordered) 2424 if wrapped 2425 else self._parse_csv(self._parse_ordered) 2426 ), 2427 ) 2428 2429 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2430 self._match_text_seq("BY") 2431 2432 self._match_l_paren() 2433 expressions = self._parse_csv(self._parse_column) 2434 self._match_r_paren() 2435 2436 if self._match_text_seq("SORTED", "BY"): 2437 self._match_l_paren() 2438 sorted_by = self._parse_csv(self._parse_ordered) 2439 self._match_r_paren() 2440 else: 2441 sorted_by = None 2442 2443 self._match(TokenType.INTO) 2444 buckets = self._parse_number() 2445 self._match_text_seq("BUCKETS") 2446 2447 return self.expression( 2448 exp.ClusteredByProperty, 2449 expressions=expressions, 2450 sorted_by=sorted_by, 2451 buckets=buckets, 2452 ) 2453 2454 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2455 if not self._match_text_seq("GRANTS"): 2456 self._retreat(self._index - 1) 2457 return None 2458 2459 return self.expression(exp.CopyGrantsProperty) 2460 2461 def _parse_freespace(self) -> exp.FreespaceProperty: 2462 self._match(TokenType.EQ) 2463 return self.expression( 2464 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2465 ) 2466 2467 def _parse_mergeblockratio( 2468 self, no: bool = False, default: bool = False 2469 ) -> exp.MergeBlockRatioProperty: 2470 if self._match(TokenType.EQ): 2471 return self.expression( 2472 exp.MergeBlockRatioProperty, 2473 this=self._parse_number(), 2474 percent=self._match(TokenType.PERCENT), 2475 ) 2476 2477 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2478 2479 def _parse_datablocksize( 2480 self, 2481 default: t.Optional[bool] = None, 2482 minimum: t.Optional[bool] = None, 2483 maximum: t.Optional[bool] = None, 2484 ) -> exp.DataBlocksizeProperty: 2485 self._match(TokenType.EQ) 2486 size = self._parse_number() 2487 2488 units = None 2489 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2490 units = self._prev.text 2491 2492 return self.expression( 2493 exp.DataBlocksizeProperty, 2494 size=size, 2495 units=units, 2496 default=default, 2497 minimum=minimum, 2498 maximum=maximum, 2499 ) 2500 2501 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2502 self._match(TokenType.EQ) 2503 always = self._match_text_seq("ALWAYS") 2504 manual = self._match_text_seq("MANUAL") 2505 never = self._match_text_seq("NEVER") 2506 default = self._match_text_seq("DEFAULT") 2507 2508 autotemp = None 2509 if self._match_text_seq("AUTOTEMP"): 2510 autotemp = self._parse_schema() 2511 2512 return self.expression( 2513 exp.BlockCompressionProperty, 2514 always=always, 2515 manual=manual, 2516 never=never, 2517 default=default, 2518 autotemp=autotemp, 2519 ) 2520 2521 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2522 index = self._index 2523 no = self._match_text_seq("NO") 2524 concurrent = self._match_text_seq("CONCURRENT") 2525 2526 if not self._match_text_seq("ISOLATED", "LOADING"): 2527 self._retreat(index) 2528 return None 2529 2530 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2531 return self.expression( 2532 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2533 ) 2534 2535 def _parse_locking(self) -> exp.LockingProperty: 2536 if self._match(TokenType.TABLE): 2537 kind = "TABLE" 2538 elif self._match(TokenType.VIEW): 2539 kind = "VIEW" 2540 elif self._match(TokenType.ROW): 2541 kind = "ROW" 2542 elif self._match_text_seq("DATABASE"): 2543 kind = "DATABASE" 2544 else: 2545 kind = None 2546 2547 if kind in ("DATABASE", "TABLE", "VIEW"): 2548 this = self._parse_table_parts() 2549 else: 2550 this = None 2551 2552 if self._match(TokenType.FOR): 2553 for_or_in = "FOR" 2554 elif self._match(TokenType.IN): 2555 for_or_in = "IN" 2556 else: 2557 for_or_in = None 2558 2559 if self._match_text_seq("ACCESS"): 2560 lock_type = "ACCESS" 2561 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2562 lock_type = "EXCLUSIVE" 2563 elif self._match_text_seq("SHARE"): 2564 lock_type = "SHARE" 2565 elif self._match_text_seq("READ"): 2566 lock_type = "READ" 2567 elif self._match_text_seq("WRITE"): 2568 lock_type = "WRITE" 2569 elif self._match_text_seq("CHECKSUM"): 2570 lock_type = "CHECKSUM" 2571 else: 2572 lock_type = None 2573 2574 override = self._match_text_seq("OVERRIDE") 2575 2576 return self.expression( 2577 exp.LockingProperty, 2578 this=this, 2579 kind=kind, 2580 for_or_in=for_or_in, 2581 lock_type=lock_type, 2582 override=override, 2583 ) 2584 2585 def _parse_partition_by(self) -> t.List[exp.Expression]: 2586 if self._match(TokenType.PARTITION_BY): 2587 return self._parse_csv(self._parse_assignment) 2588 return [] 2589 2590 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2591 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2592 if self._match_text_seq("MINVALUE"): 2593 return exp.var("MINVALUE") 2594 if self._match_text_seq("MAXVALUE"): 2595 return exp.var("MAXVALUE") 2596 return self._parse_bitwise() 2597 2598 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2599 expression = None 2600 from_expressions = None 2601 to_expressions = None 2602 2603 if self._match(TokenType.IN): 2604 this = self._parse_wrapped_csv(self._parse_bitwise) 2605 elif self._match(TokenType.FROM): 2606 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2607 self._match_text_seq("TO") 2608 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2609 elif self._match_text_seq("WITH", "(", "MODULUS"): 2610 this = self._parse_number() 2611 self._match_text_seq(",", "REMAINDER") 2612 expression = self._parse_number() 2613 self._match_r_paren() 2614 else: 2615 self.raise_error("Failed to parse partition bound spec.") 2616 2617 return self.expression( 2618 exp.PartitionBoundSpec, 2619 this=this, 2620 expression=expression, 2621 from_expressions=from_expressions, 2622 to_expressions=to_expressions, 2623 ) 2624 2625 # https://www.postgresql.org/docs/current/sql-createtable.html 2626 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2627 if not self._match_text_seq("OF"): 2628 self._retreat(self._index - 1) 2629 return None 2630 2631 this = self._parse_table(schema=True) 2632 2633 if self._match(TokenType.DEFAULT): 2634 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2635 elif self._match_text_seq("FOR", "VALUES"): 2636 expression = self._parse_partition_bound_spec() 2637 else: 2638 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2639 2640 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2641 2642 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2643 self._match(TokenType.EQ) 2644 return self.expression( 2645 exp.PartitionedByProperty, 2646 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2647 ) 2648 2649 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2650 if self._match_text_seq("AND", "STATISTICS"): 2651 statistics = True 2652 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2653 statistics = False 2654 else: 2655 statistics = None 2656 2657 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2658 2659 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2660 if self._match_text_seq("SQL"): 2661 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2662 return None 2663 2664 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2665 if self._match_text_seq("SQL", "DATA"): 2666 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2667 return None 2668 2669 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2670 if self._match_text_seq("PRIMARY", "INDEX"): 2671 return exp.NoPrimaryIndexProperty() 2672 if self._match_text_seq("SQL"): 2673 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2674 return None 2675 2676 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2677 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2678 return exp.OnCommitProperty() 2679 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2680 return exp.OnCommitProperty(delete=True) 2681 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2682 2683 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2684 if self._match_text_seq("SQL", "DATA"): 2685 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2686 return None 2687 2688 def _parse_distkey(self) -> exp.DistKeyProperty: 2689 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2690 2691 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2692 table = self._parse_table(schema=True) 2693 2694 options = [] 2695 while self._match_texts(("INCLUDING", "EXCLUDING")): 2696 this = self._prev.text.upper() 2697 2698 id_var = self._parse_id_var() 2699 if not id_var: 2700 return None 2701 2702 options.append( 2703 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2704 ) 2705 2706 return self.expression(exp.LikeProperty, this=table, expressions=options) 2707 2708 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2709 return self.expression( 2710 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2711 ) 2712 2713 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2714 self._match(TokenType.EQ) 2715 return self.expression( 2716 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2717 ) 2718 2719 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2720 self._match_text_seq("WITH", "CONNECTION") 2721 return self.expression( 2722 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2723 ) 2724 2725 def _parse_returns(self) -> exp.ReturnsProperty: 2726 value: t.Optional[exp.Expression] 2727 null = None 2728 is_table = self._match(TokenType.TABLE) 2729 2730 if is_table: 2731 if self._match(TokenType.LT): 2732 value = self.expression( 2733 exp.Schema, 2734 this="TABLE", 2735 expressions=self._parse_csv(self._parse_struct_types), 2736 ) 2737 if not self._match(TokenType.GT): 2738 self.raise_error("Expecting >") 2739 else: 2740 value = self._parse_schema(exp.var("TABLE")) 2741 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2742 null = True 2743 value = None 2744 else: 2745 value = self._parse_types() 2746 2747 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2748 2749 def _parse_describe(self) -> exp.Describe: 2750 kind = self._match_set(self.CREATABLES) and self._prev.text 2751 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2752 if self._match(TokenType.DOT): 2753 style = None 2754 self._retreat(self._index - 2) 2755 2756 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2757 2758 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2759 this = self._parse_statement() 2760 else: 2761 this = self._parse_table(schema=True) 2762 2763 properties = self._parse_properties() 2764 expressions = properties.expressions if properties else None 2765 partition = self._parse_partition() 2766 return self.expression( 2767 exp.Describe, 2768 this=this, 2769 style=style, 2770 kind=kind, 2771 expressions=expressions, 2772 partition=partition, 2773 format=format, 2774 ) 2775 2776 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2777 kind = self._prev.text.upper() 2778 expressions = [] 2779 2780 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2781 if self._match(TokenType.WHEN): 2782 expression = self._parse_disjunction() 2783 self._match(TokenType.THEN) 2784 else: 2785 expression = None 2786 2787 else_ = self._match(TokenType.ELSE) 2788 2789 if not self._match(TokenType.INTO): 2790 return None 2791 2792 return self.expression( 2793 exp.ConditionalInsert, 2794 this=self.expression( 2795 exp.Insert, 2796 this=self._parse_table(schema=True), 2797 expression=self._parse_derived_table_values(), 2798 ), 2799 expression=expression, 2800 else_=else_, 2801 ) 2802 2803 expression = parse_conditional_insert() 2804 while expression is not None: 2805 expressions.append(expression) 2806 expression = parse_conditional_insert() 2807 2808 return self.expression( 2809 exp.MultitableInserts, 2810 kind=kind, 2811 comments=comments, 2812 expressions=expressions, 2813 source=self._parse_table(), 2814 ) 2815 2816 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2817 comments = [] 2818 hint = self._parse_hint() 2819 overwrite = self._match(TokenType.OVERWRITE) 2820 ignore = self._match(TokenType.IGNORE) 2821 local = self._match_text_seq("LOCAL") 2822 alternative = None 2823 is_function = None 2824 2825 if self._match_text_seq("DIRECTORY"): 2826 this: t.Optional[exp.Expression] = self.expression( 2827 exp.Directory, 2828 this=self._parse_var_or_string(), 2829 local=local, 2830 row_format=self._parse_row_format(match_row=True), 2831 ) 2832 else: 2833 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2834 comments += ensure_list(self._prev_comments) 2835 return self._parse_multitable_inserts(comments) 2836 2837 if self._match(TokenType.OR): 2838 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2839 2840 self._match(TokenType.INTO) 2841 comments += ensure_list(self._prev_comments) 2842 self._match(TokenType.TABLE) 2843 is_function = self._match(TokenType.FUNCTION) 2844 2845 this = ( 2846 self._parse_table(schema=True, parse_partition=True) 2847 if not is_function 2848 else self._parse_function() 2849 ) 2850 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2851 this.set("alias", self._parse_table_alias()) 2852 2853 returning = self._parse_returning() 2854 2855 return self.expression( 2856 exp.Insert, 2857 comments=comments, 2858 hint=hint, 2859 is_function=is_function, 2860 this=this, 2861 stored=self._match_text_seq("STORED") and self._parse_stored(), 2862 by_name=self._match_text_seq("BY", "NAME"), 2863 exists=self._parse_exists(), 2864 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2865 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2866 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2867 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2868 conflict=self._parse_on_conflict(), 2869 returning=returning or self._parse_returning(), 2870 overwrite=overwrite, 2871 alternative=alternative, 2872 ignore=ignore, 2873 source=self._match(TokenType.TABLE) and self._parse_table(), 2874 ) 2875 2876 def _parse_kill(self) -> exp.Kill: 2877 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2878 2879 return self.expression( 2880 exp.Kill, 2881 this=self._parse_primary(), 2882 kind=kind, 2883 ) 2884 2885 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2886 conflict = self._match_text_seq("ON", "CONFLICT") 2887 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2888 2889 if not conflict and not duplicate: 2890 return None 2891 2892 conflict_keys = None 2893 constraint = None 2894 2895 if conflict: 2896 if self._match_text_seq("ON", "CONSTRAINT"): 2897 constraint = self._parse_id_var() 2898 elif self._match(TokenType.L_PAREN): 2899 conflict_keys = self._parse_csv(self._parse_id_var) 2900 self._match_r_paren() 2901 2902 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2903 if self._prev.token_type == TokenType.UPDATE: 2904 self._match(TokenType.SET) 2905 expressions = self._parse_csv(self._parse_equality) 2906 else: 2907 expressions = None 2908 2909 return self.expression( 2910 exp.OnConflict, 2911 duplicate=duplicate, 2912 expressions=expressions, 2913 action=action, 2914 conflict_keys=conflict_keys, 2915 constraint=constraint, 2916 where=self._parse_where(), 2917 ) 2918 2919 def _parse_returning(self) -> t.Optional[exp.Returning]: 2920 if not self._match(TokenType.RETURNING): 2921 return None 2922 return self.expression( 2923 exp.Returning, 2924 expressions=self._parse_csv(self._parse_expression), 2925 into=self._match(TokenType.INTO) and self._parse_table_part(), 2926 ) 2927 2928 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2929 if not self._match(TokenType.FORMAT): 2930 return None 2931 return self._parse_row_format() 2932 2933 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2934 index = self._index 2935 with_ = with_ or self._match_text_seq("WITH") 2936 2937 if not self._match(TokenType.SERDE_PROPERTIES): 2938 self._retreat(index) 2939 return None 2940 return self.expression( 2941 exp.SerdeProperties, 2942 **{ # type: ignore 2943 "expressions": self._parse_wrapped_properties(), 2944 "with": with_, 2945 }, 2946 ) 2947 2948 def _parse_row_format( 2949 self, match_row: bool = False 2950 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2951 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2952 return None 2953 2954 if self._match_text_seq("SERDE"): 2955 this = self._parse_string() 2956 2957 serde_properties = self._parse_serde_properties() 2958 2959 return self.expression( 2960 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2961 ) 2962 2963 self._match_text_seq("DELIMITED") 2964 2965 kwargs = {} 2966 2967 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2968 kwargs["fields"] = self._parse_string() 2969 if self._match_text_seq("ESCAPED", "BY"): 2970 kwargs["escaped"] = self._parse_string() 2971 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2972 kwargs["collection_items"] = self._parse_string() 2973 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2974 kwargs["map_keys"] = self._parse_string() 2975 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2976 kwargs["lines"] = self._parse_string() 2977 if self._match_text_seq("NULL", "DEFINED", "AS"): 2978 kwargs["null"] = self._parse_string() 2979 2980 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2981 2982 def _parse_load(self) -> exp.LoadData | exp.Command: 2983 if self._match_text_seq("DATA"): 2984 local = self._match_text_seq("LOCAL") 2985 self._match_text_seq("INPATH") 2986 inpath = self._parse_string() 2987 overwrite = self._match(TokenType.OVERWRITE) 2988 self._match_pair(TokenType.INTO, TokenType.TABLE) 2989 2990 return self.expression( 2991 exp.LoadData, 2992 this=self._parse_table(schema=True), 2993 local=local, 2994 overwrite=overwrite, 2995 inpath=inpath, 2996 partition=self._parse_partition(), 2997 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2998 serde=self._match_text_seq("SERDE") and self._parse_string(), 2999 ) 3000 return self._parse_as_command(self._prev) 3001 3002 def _parse_delete(self) -> exp.Delete: 3003 # This handles MySQL's "Multiple-Table Syntax" 3004 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3005 tables = None 3006 if not self._match(TokenType.FROM, advance=False): 3007 tables = self._parse_csv(self._parse_table) or None 3008 3009 returning = self._parse_returning() 3010 3011 return self.expression( 3012 exp.Delete, 3013 tables=tables, 3014 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3015 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3016 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3017 where=self._parse_where(), 3018 returning=returning or self._parse_returning(), 3019 limit=self._parse_limit(), 3020 ) 3021 3022 def _parse_update(self) -> exp.Update: 3023 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3024 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3025 returning = self._parse_returning() 3026 return self.expression( 3027 exp.Update, 3028 **{ # type: ignore 3029 "this": this, 3030 "expressions": expressions, 3031 "from": self._parse_from(joins=True), 3032 "where": self._parse_where(), 3033 "returning": returning or self._parse_returning(), 3034 "order": self._parse_order(), 3035 "limit": self._parse_limit(), 3036 }, 3037 ) 3038 3039 def _parse_use(self) -> exp.Use: 3040 return self.expression( 3041 exp.Use, 3042 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3043 this=self._parse_table(schema=False), 3044 ) 3045 3046 def _parse_uncache(self) -> exp.Uncache: 3047 if not self._match(TokenType.TABLE): 3048 self.raise_error("Expecting TABLE after UNCACHE") 3049 3050 return self.expression( 3051 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3052 ) 3053 3054 def _parse_cache(self) -> exp.Cache: 3055 lazy = self._match_text_seq("LAZY") 3056 self._match(TokenType.TABLE) 3057 table = self._parse_table(schema=True) 3058 3059 options = [] 3060 if self._match_text_seq("OPTIONS"): 3061 self._match_l_paren() 3062 k = self._parse_string() 3063 self._match(TokenType.EQ) 3064 v = self._parse_string() 3065 options = [k, v] 3066 self._match_r_paren() 3067 3068 self._match(TokenType.ALIAS) 3069 return self.expression( 3070 exp.Cache, 3071 this=table, 3072 lazy=lazy, 3073 options=options, 3074 expression=self._parse_select(nested=True), 3075 ) 3076 3077 def _parse_partition(self) -> t.Optional[exp.Partition]: 3078 if not self._match_texts(self.PARTITION_KEYWORDS): 3079 return None 3080 3081 return self.expression( 3082 exp.Partition, 3083 subpartition=self._prev.text.upper() == "SUBPARTITION", 3084 expressions=self._parse_wrapped_csv(self._parse_assignment), 3085 ) 3086 3087 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3088 def _parse_value_expression() -> t.Optional[exp.Expression]: 3089 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3090 return exp.var(self._prev.text.upper()) 3091 return self._parse_expression() 3092 3093 if self._match(TokenType.L_PAREN): 3094 expressions = self._parse_csv(_parse_value_expression) 3095 self._match_r_paren() 3096 return self.expression(exp.Tuple, expressions=expressions) 3097 3098 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3099 expression = self._parse_expression() 3100 if expression: 3101 return self.expression(exp.Tuple, expressions=[expression]) 3102 return None 3103 3104 def _parse_projections(self) -> t.List[exp.Expression]: 3105 return self._parse_expressions() 3106 3107 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3108 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3109 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3110 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3111 ) 3112 elif self._match(TokenType.FROM): 3113 from_ = self._parse_from(skip_from_token=True) 3114 # Support parentheses for duckdb FROM-first syntax 3115 select = self._parse_select() 3116 if select: 3117 select.set("from", from_) 3118 this = select 3119 else: 3120 this = exp.select("*").from_(t.cast(exp.From, from_)) 3121 else: 3122 this = ( 3123 self._parse_table() 3124 if table 3125 else self._parse_select(nested=True, parse_set_operation=False) 3126 ) 3127 3128 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3129 # in case a modifier (e.g. join) is following 3130 if table and isinstance(this, exp.Values) and this.alias: 3131 alias = this.args["alias"].pop() 3132 this = exp.Table(this=this, alias=alias) 3133 3134 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3135 3136 return this 3137 3138 def _parse_select( 3139 self, 3140 nested: bool = False, 3141 table: bool = False, 3142 parse_subquery_alias: bool = True, 3143 parse_set_operation: bool = True, 3144 ) -> t.Optional[exp.Expression]: 3145 cte = self._parse_with() 3146 3147 if cte: 3148 this = self._parse_statement() 3149 3150 if not this: 3151 self.raise_error("Failed to parse any statement following CTE") 3152 return cte 3153 3154 if "with" in this.arg_types: 3155 this.set("with", cte) 3156 else: 3157 self.raise_error(f"{this.key} does not support CTE") 3158 this = cte 3159 3160 return this 3161 3162 # duckdb supports leading with FROM x 3163 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3164 3165 if self._match(TokenType.SELECT): 3166 comments = self._prev_comments 3167 3168 hint = self._parse_hint() 3169 3170 if self._next and not self._next.token_type == TokenType.DOT: 3171 all_ = self._match(TokenType.ALL) 3172 distinct = self._match_set(self.DISTINCT_TOKENS) 3173 else: 3174 all_, distinct = None, None 3175 3176 kind = ( 3177 self._match(TokenType.ALIAS) 3178 and self._match_texts(("STRUCT", "VALUE")) 3179 and self._prev.text.upper() 3180 ) 3181 3182 if distinct: 3183 distinct = self.expression( 3184 exp.Distinct, 3185 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3186 ) 3187 3188 if all_ and distinct: 3189 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3190 3191 operation_modifiers = [] 3192 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3193 operation_modifiers.append(exp.var(self._prev.text.upper())) 3194 3195 limit = self._parse_limit(top=True) 3196 projections = self._parse_projections() 3197 3198 this = self.expression( 3199 exp.Select, 3200 kind=kind, 3201 hint=hint, 3202 distinct=distinct, 3203 expressions=projections, 3204 limit=limit, 3205 operation_modifiers=operation_modifiers or None, 3206 ) 3207 this.comments = comments 3208 3209 into = self._parse_into() 3210 if into: 3211 this.set("into", into) 3212 3213 if not from_: 3214 from_ = self._parse_from() 3215 3216 if from_: 3217 this.set("from", from_) 3218 3219 this = self._parse_query_modifiers(this) 3220 elif (table or nested) and self._match(TokenType.L_PAREN): 3221 this = self._parse_wrapped_select(table=table) 3222 3223 # We return early here so that the UNION isn't attached to the subquery by the 3224 # following call to _parse_set_operations, but instead becomes the parent node 3225 self._match_r_paren() 3226 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3227 elif self._match(TokenType.VALUES, advance=False): 3228 this = self._parse_derived_table_values() 3229 elif from_: 3230 this = exp.select("*").from_(from_.this, copy=False) 3231 elif self._match(TokenType.SUMMARIZE): 3232 table = self._match(TokenType.TABLE) 3233 this = self._parse_select() or self._parse_string() or self._parse_table() 3234 return self.expression(exp.Summarize, this=this, table=table) 3235 elif self._match(TokenType.DESCRIBE): 3236 this = self._parse_describe() 3237 elif self._match_text_seq("STREAM"): 3238 this = self._parse_function() 3239 if this: 3240 this = self.expression(exp.Stream, this=this) 3241 else: 3242 self._retreat(self._index - 1) 3243 else: 3244 this = None 3245 3246 return self._parse_set_operations(this) if parse_set_operation else this 3247 3248 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3249 self._match_text_seq("SEARCH") 3250 3251 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3252 3253 if not kind: 3254 return None 3255 3256 self._match_text_seq("FIRST", "BY") 3257 3258 return self.expression( 3259 exp.RecursiveWithSearch, 3260 kind=kind, 3261 this=self._parse_id_var(), 3262 expression=self._match_text_seq("SET") and self._parse_id_var(), 3263 using=self._match_text_seq("USING") and self._parse_id_var(), 3264 ) 3265 3266 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3267 if not skip_with_token and not self._match(TokenType.WITH): 3268 return None 3269 3270 comments = self._prev_comments 3271 recursive = self._match(TokenType.RECURSIVE) 3272 3273 last_comments = None 3274 expressions = [] 3275 while True: 3276 cte = self._parse_cte() 3277 if isinstance(cte, exp.CTE): 3278 expressions.append(cte) 3279 if last_comments: 3280 cte.add_comments(last_comments) 3281 3282 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3283 break 3284 else: 3285 self._match(TokenType.WITH) 3286 3287 last_comments = self._prev_comments 3288 3289 return self.expression( 3290 exp.With, 3291 comments=comments, 3292 expressions=expressions, 3293 recursive=recursive, 3294 search=self._parse_recursive_with_search(), 3295 ) 3296 3297 def _parse_cte(self) -> t.Optional[exp.CTE]: 3298 index = self._index 3299 3300 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3301 if not alias or not alias.this: 3302 self.raise_error("Expected CTE to have alias") 3303 3304 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3305 self._retreat(index) 3306 return None 3307 3308 comments = self._prev_comments 3309 3310 if self._match_text_seq("NOT", "MATERIALIZED"): 3311 materialized = False 3312 elif self._match_text_seq("MATERIALIZED"): 3313 materialized = True 3314 else: 3315 materialized = None 3316 3317 cte = self.expression( 3318 exp.CTE, 3319 this=self._parse_wrapped(self._parse_statement), 3320 alias=alias, 3321 materialized=materialized, 3322 comments=comments, 3323 ) 3324 3325 if isinstance(cte.this, exp.Values): 3326 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3327 3328 return cte 3329 3330 def _parse_table_alias( 3331 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3332 ) -> t.Optional[exp.TableAlias]: 3333 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3334 # so this section tries to parse the clause version and if it fails, it treats the token 3335 # as an identifier (alias) 3336 if self._can_parse_limit_or_offset(): 3337 return None 3338 3339 any_token = self._match(TokenType.ALIAS) 3340 alias = ( 3341 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3342 or self._parse_string_as_identifier() 3343 ) 3344 3345 index = self._index 3346 if self._match(TokenType.L_PAREN): 3347 columns = self._parse_csv(self._parse_function_parameter) 3348 self._match_r_paren() if columns else self._retreat(index) 3349 else: 3350 columns = None 3351 3352 if not alias and not columns: 3353 return None 3354 3355 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3356 3357 # We bubble up comments from the Identifier to the TableAlias 3358 if isinstance(alias, exp.Identifier): 3359 table_alias.add_comments(alias.pop_comments()) 3360 3361 return table_alias 3362 3363 def _parse_subquery( 3364 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3365 ) -> t.Optional[exp.Subquery]: 3366 if not this: 3367 return None 3368 3369 return self.expression( 3370 exp.Subquery, 3371 this=this, 3372 pivots=self._parse_pivots(), 3373 alias=self._parse_table_alias() if parse_alias else None, 3374 sample=self._parse_table_sample(), 3375 ) 3376 3377 def _implicit_unnests_to_explicit(self, this: E) -> E: 3378 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3379 3380 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3381 for i, join in enumerate(this.args.get("joins") or []): 3382 table = join.this 3383 normalized_table = table.copy() 3384 normalized_table.meta["maybe_column"] = True 3385 normalized_table = _norm(normalized_table, dialect=self.dialect) 3386 3387 if isinstance(table, exp.Table) and not join.args.get("on"): 3388 if normalized_table.parts[0].name in refs: 3389 table_as_column = table.to_column() 3390 unnest = exp.Unnest(expressions=[table_as_column]) 3391 3392 # Table.to_column creates a parent Alias node that we want to convert to 3393 # a TableAlias and attach to the Unnest, so it matches the parser's output 3394 if isinstance(table.args.get("alias"), exp.TableAlias): 3395 table_as_column.replace(table_as_column.this) 3396 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3397 3398 table.replace(unnest) 3399 3400 refs.add(normalized_table.alias_or_name) 3401 3402 return this 3403 3404 def _parse_query_modifiers( 3405 self, this: t.Optional[exp.Expression] 3406 ) -> t.Optional[exp.Expression]: 3407 if isinstance(this, self.MODIFIABLES): 3408 for join in self._parse_joins(): 3409 this.append("joins", join) 3410 for lateral in iter(self._parse_lateral, None): 3411 this.append("laterals", lateral) 3412 3413 while True: 3414 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3415 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3416 key, expression = parser(self) 3417 3418 if expression: 3419 this.set(key, expression) 3420 if key == "limit": 3421 offset = expression.args.pop("offset", None) 3422 3423 if offset: 3424 offset = exp.Offset(expression=offset) 3425 this.set("offset", offset) 3426 3427 limit_by_expressions = expression.expressions 3428 expression.set("expressions", None) 3429 offset.set("expressions", limit_by_expressions) 3430 continue 3431 break 3432 3433 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3434 this = self._implicit_unnests_to_explicit(this) 3435 3436 return this 3437 3438 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3439 start = self._curr 3440 while self._curr: 3441 self._advance() 3442 3443 end = self._tokens[self._index - 1] 3444 return exp.Hint(expressions=[self._find_sql(start, end)]) 3445 3446 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3447 return self._parse_function_call() 3448 3449 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3450 start_index = self._index 3451 should_fallback_to_string = False 3452 3453 hints = [] 3454 try: 3455 for hint in iter( 3456 lambda: self._parse_csv( 3457 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3458 ), 3459 [], 3460 ): 3461 hints.extend(hint) 3462 except ParseError: 3463 should_fallback_to_string = True 3464 3465 if should_fallback_to_string or self._curr: 3466 self._retreat(start_index) 3467 return self._parse_hint_fallback_to_string() 3468 3469 return self.expression(exp.Hint, expressions=hints) 3470 3471 def _parse_hint(self) -> t.Optional[exp.Hint]: 3472 if self._match(TokenType.HINT) and self._prev_comments: 3473 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3474 3475 return None 3476 3477 def _parse_into(self) -> t.Optional[exp.Into]: 3478 if not self._match(TokenType.INTO): 3479 return None 3480 3481 temp = self._match(TokenType.TEMPORARY) 3482 unlogged = self._match_text_seq("UNLOGGED") 3483 self._match(TokenType.TABLE) 3484 3485 return self.expression( 3486 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3487 ) 3488 3489 def _parse_from( 3490 self, joins: bool = False, skip_from_token: bool = False 3491 ) -> t.Optional[exp.From]: 3492 if not skip_from_token and not self._match(TokenType.FROM): 3493 return None 3494 3495 return self.expression( 3496 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3497 ) 3498 3499 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3500 return self.expression( 3501 exp.MatchRecognizeMeasure, 3502 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3503 this=self._parse_expression(), 3504 ) 3505 3506 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3507 if not self._match(TokenType.MATCH_RECOGNIZE): 3508 return None 3509 3510 self._match_l_paren() 3511 3512 partition = self._parse_partition_by() 3513 order = self._parse_order() 3514 3515 measures = ( 3516 self._parse_csv(self._parse_match_recognize_measure) 3517 if self._match_text_seq("MEASURES") 3518 else None 3519 ) 3520 3521 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3522 rows = exp.var("ONE ROW PER MATCH") 3523 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3524 text = "ALL ROWS PER MATCH" 3525 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3526 text += " SHOW EMPTY MATCHES" 3527 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3528 text += " OMIT EMPTY MATCHES" 3529 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3530 text += " WITH UNMATCHED ROWS" 3531 rows = exp.var(text) 3532 else: 3533 rows = None 3534 3535 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3536 text = "AFTER MATCH SKIP" 3537 if self._match_text_seq("PAST", "LAST", "ROW"): 3538 text += " PAST LAST ROW" 3539 elif self._match_text_seq("TO", "NEXT", "ROW"): 3540 text += " TO NEXT ROW" 3541 elif self._match_text_seq("TO", "FIRST"): 3542 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3543 elif self._match_text_seq("TO", "LAST"): 3544 text += f" TO LAST {self._advance_any().text}" # type: ignore 3545 after = exp.var(text) 3546 else: 3547 after = None 3548 3549 if self._match_text_seq("PATTERN"): 3550 self._match_l_paren() 3551 3552 if not self._curr: 3553 self.raise_error("Expecting )", self._curr) 3554 3555 paren = 1 3556 start = self._curr 3557 3558 while self._curr and paren > 0: 3559 if self._curr.token_type == TokenType.L_PAREN: 3560 paren += 1 3561 if self._curr.token_type == TokenType.R_PAREN: 3562 paren -= 1 3563 3564 end = self._prev 3565 self._advance() 3566 3567 if paren > 0: 3568 self.raise_error("Expecting )", self._curr) 3569 3570 pattern = exp.var(self._find_sql(start, end)) 3571 else: 3572 pattern = None 3573 3574 define = ( 3575 self._parse_csv(self._parse_name_as_expression) 3576 if self._match_text_seq("DEFINE") 3577 else None 3578 ) 3579 3580 self._match_r_paren() 3581 3582 return self.expression( 3583 exp.MatchRecognize, 3584 partition_by=partition, 3585 order=order, 3586 measures=measures, 3587 rows=rows, 3588 after=after, 3589 pattern=pattern, 3590 define=define, 3591 alias=self._parse_table_alias(), 3592 ) 3593 3594 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3595 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3596 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3597 cross_apply = False 3598 3599 if cross_apply is not None: 3600 this = self._parse_select(table=True) 3601 view = None 3602 outer = None 3603 elif self._match(TokenType.LATERAL): 3604 this = self._parse_select(table=True) 3605 view = self._match(TokenType.VIEW) 3606 outer = self._match(TokenType.OUTER) 3607 else: 3608 return None 3609 3610 if not this: 3611 this = ( 3612 self._parse_unnest() 3613 or self._parse_function() 3614 or self._parse_id_var(any_token=False) 3615 ) 3616 3617 while self._match(TokenType.DOT): 3618 this = exp.Dot( 3619 this=this, 3620 expression=self._parse_function() or self._parse_id_var(any_token=False), 3621 ) 3622 3623 ordinality: t.Optional[bool] = None 3624 3625 if view: 3626 table = self._parse_id_var(any_token=False) 3627 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3628 table_alias: t.Optional[exp.TableAlias] = self.expression( 3629 exp.TableAlias, this=table, columns=columns 3630 ) 3631 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3632 # We move the alias from the lateral's child node to the lateral itself 3633 table_alias = this.args["alias"].pop() 3634 else: 3635 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3636 table_alias = self._parse_table_alias() 3637 3638 return self.expression( 3639 exp.Lateral, 3640 this=this, 3641 view=view, 3642 outer=outer, 3643 alias=table_alias, 3644 cross_apply=cross_apply, 3645 ordinality=ordinality, 3646 ) 3647 3648 def _parse_join_parts( 3649 self, 3650 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3651 return ( 3652 self._match_set(self.JOIN_METHODS) and self._prev, 3653 self._match_set(self.JOIN_SIDES) and self._prev, 3654 self._match_set(self.JOIN_KINDS) and self._prev, 3655 ) 3656 3657 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3658 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3659 this = self._parse_column() 3660 if isinstance(this, exp.Column): 3661 return this.this 3662 return this 3663 3664 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3665 3666 def _parse_join( 3667 self, skip_join_token: bool = False, parse_bracket: bool = False 3668 ) -> t.Optional[exp.Join]: 3669 if self._match(TokenType.COMMA): 3670 table = self._try_parse(self._parse_table) 3671 if table: 3672 return self.expression(exp.Join, this=table) 3673 return None 3674 3675 index = self._index 3676 method, side, kind = self._parse_join_parts() 3677 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3678 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3679 3680 if not skip_join_token and not join: 3681 self._retreat(index) 3682 kind = None 3683 method = None 3684 side = None 3685 3686 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3687 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3688 3689 if not skip_join_token and not join and not outer_apply and not cross_apply: 3690 return None 3691 3692 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3693 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3694 kwargs["expressions"] = self._parse_csv( 3695 lambda: self._parse_table(parse_bracket=parse_bracket) 3696 ) 3697 3698 if method: 3699 kwargs["method"] = method.text 3700 if side: 3701 kwargs["side"] = side.text 3702 if kind: 3703 kwargs["kind"] = kind.text 3704 if hint: 3705 kwargs["hint"] = hint 3706 3707 if self._match(TokenType.MATCH_CONDITION): 3708 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3709 3710 if self._match(TokenType.ON): 3711 kwargs["on"] = self._parse_assignment() 3712 elif self._match(TokenType.USING): 3713 kwargs["using"] = self._parse_using_identifiers() 3714 elif ( 3715 not (outer_apply or cross_apply) 3716 and not isinstance(kwargs["this"], exp.Unnest) 3717 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3718 ): 3719 index = self._index 3720 joins: t.Optional[list] = list(self._parse_joins()) 3721 3722 if joins and self._match(TokenType.ON): 3723 kwargs["on"] = self._parse_assignment() 3724 elif joins and self._match(TokenType.USING): 3725 kwargs["using"] = self._parse_using_identifiers() 3726 else: 3727 joins = None 3728 self._retreat(index) 3729 3730 kwargs["this"].set("joins", joins if joins else None) 3731 3732 comments = [c for token in (method, side, kind) if token for c in token.comments] 3733 return self.expression(exp.Join, comments=comments, **kwargs) 3734 3735 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3736 this = self._parse_assignment() 3737 3738 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3739 return this 3740 3741 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3742 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3743 3744 return this 3745 3746 def _parse_index_params(self) -> exp.IndexParameters: 3747 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3748 3749 if self._match(TokenType.L_PAREN, advance=False): 3750 columns = self._parse_wrapped_csv(self._parse_with_operator) 3751 else: 3752 columns = None 3753 3754 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3755 partition_by = self._parse_partition_by() 3756 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3757 tablespace = ( 3758 self._parse_var(any_token=True) 3759 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3760 else None 3761 ) 3762 where = self._parse_where() 3763 3764 on = self._parse_field() if self._match(TokenType.ON) else None 3765 3766 return self.expression( 3767 exp.IndexParameters, 3768 using=using, 3769 columns=columns, 3770 include=include, 3771 partition_by=partition_by, 3772 where=where, 3773 with_storage=with_storage, 3774 tablespace=tablespace, 3775 on=on, 3776 ) 3777 3778 def _parse_index( 3779 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3780 ) -> t.Optional[exp.Index]: 3781 if index or anonymous: 3782 unique = None 3783 primary = None 3784 amp = None 3785 3786 self._match(TokenType.ON) 3787 self._match(TokenType.TABLE) # hive 3788 table = self._parse_table_parts(schema=True) 3789 else: 3790 unique = self._match(TokenType.UNIQUE) 3791 primary = self._match_text_seq("PRIMARY") 3792 amp = self._match_text_seq("AMP") 3793 3794 if not self._match(TokenType.INDEX): 3795 return None 3796 3797 index = self._parse_id_var() 3798 table = None 3799 3800 params = self._parse_index_params() 3801 3802 return self.expression( 3803 exp.Index, 3804 this=index, 3805 table=table, 3806 unique=unique, 3807 primary=primary, 3808 amp=amp, 3809 params=params, 3810 ) 3811 3812 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3813 hints: t.List[exp.Expression] = [] 3814 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3815 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3816 hints.append( 3817 self.expression( 3818 exp.WithTableHint, 3819 expressions=self._parse_csv( 3820 lambda: self._parse_function() or self._parse_var(any_token=True) 3821 ), 3822 ) 3823 ) 3824 self._match_r_paren() 3825 else: 3826 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3827 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3828 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3829 3830 self._match_set((TokenType.INDEX, TokenType.KEY)) 3831 if self._match(TokenType.FOR): 3832 hint.set("target", self._advance_any() and self._prev.text.upper()) 3833 3834 hint.set("expressions", self._parse_wrapped_id_vars()) 3835 hints.append(hint) 3836 3837 return hints or None 3838 3839 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3840 return ( 3841 (not schema and self._parse_function(optional_parens=False)) 3842 or self._parse_id_var(any_token=False) 3843 or self._parse_string_as_identifier() 3844 or self._parse_placeholder() 3845 ) 3846 3847 def _parse_table_parts( 3848 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3849 ) -> exp.Table: 3850 catalog = None 3851 db = None 3852 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3853 3854 while self._match(TokenType.DOT): 3855 if catalog: 3856 # This allows nesting the table in arbitrarily many dot expressions if needed 3857 table = self.expression( 3858 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3859 ) 3860 else: 3861 catalog = db 3862 db = table 3863 # "" used for tsql FROM a..b case 3864 table = self._parse_table_part(schema=schema) or "" 3865 3866 if ( 3867 wildcard 3868 and self._is_connected() 3869 and (isinstance(table, exp.Identifier) or not table) 3870 and self._match(TokenType.STAR) 3871 ): 3872 if isinstance(table, exp.Identifier): 3873 table.args["this"] += "*" 3874 else: 3875 table = exp.Identifier(this="*") 3876 3877 # We bubble up comments from the Identifier to the Table 3878 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3879 3880 if is_db_reference: 3881 catalog = db 3882 db = table 3883 table = None 3884 3885 if not table and not is_db_reference: 3886 self.raise_error(f"Expected table name but got {self._curr}") 3887 if not db and is_db_reference: 3888 self.raise_error(f"Expected database name but got {self._curr}") 3889 3890 table = self.expression( 3891 exp.Table, 3892 comments=comments, 3893 this=table, 3894 db=db, 3895 catalog=catalog, 3896 ) 3897 3898 changes = self._parse_changes() 3899 if changes: 3900 table.set("changes", changes) 3901 3902 at_before = self._parse_historical_data() 3903 if at_before: 3904 table.set("when", at_before) 3905 3906 pivots = self._parse_pivots() 3907 if pivots: 3908 table.set("pivots", pivots) 3909 3910 return table 3911 3912 def _parse_table( 3913 self, 3914 schema: bool = False, 3915 joins: bool = False, 3916 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3917 parse_bracket: bool = False, 3918 is_db_reference: bool = False, 3919 parse_partition: bool = False, 3920 ) -> t.Optional[exp.Expression]: 3921 lateral = self._parse_lateral() 3922 if lateral: 3923 return lateral 3924 3925 unnest = self._parse_unnest() 3926 if unnest: 3927 return unnest 3928 3929 values = self._parse_derived_table_values() 3930 if values: 3931 return values 3932 3933 subquery = self._parse_select(table=True) 3934 if subquery: 3935 if not subquery.args.get("pivots"): 3936 subquery.set("pivots", self._parse_pivots()) 3937 return subquery 3938 3939 bracket = parse_bracket and self._parse_bracket(None) 3940 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3941 3942 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3943 self._parse_table 3944 ) 3945 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3946 3947 only = self._match(TokenType.ONLY) 3948 3949 this = t.cast( 3950 exp.Expression, 3951 bracket 3952 or rows_from 3953 or self._parse_bracket( 3954 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3955 ), 3956 ) 3957 3958 if only: 3959 this.set("only", only) 3960 3961 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3962 self._match_text_seq("*") 3963 3964 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3965 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3966 this.set("partition", self._parse_partition()) 3967 3968 if schema: 3969 return self._parse_schema(this=this) 3970 3971 version = self._parse_version() 3972 3973 if version: 3974 this.set("version", version) 3975 3976 if self.dialect.ALIAS_POST_TABLESAMPLE: 3977 this.set("sample", self._parse_table_sample()) 3978 3979 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3980 if alias: 3981 this.set("alias", alias) 3982 3983 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3984 return self.expression( 3985 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3986 ) 3987 3988 this.set("hints", self._parse_table_hints()) 3989 3990 if not this.args.get("pivots"): 3991 this.set("pivots", self._parse_pivots()) 3992 3993 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3994 this.set("sample", self._parse_table_sample()) 3995 3996 if joins: 3997 for join in self._parse_joins(): 3998 this.append("joins", join) 3999 4000 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4001 this.set("ordinality", True) 4002 this.set("alias", self._parse_table_alias()) 4003 4004 return this 4005 4006 def _parse_version(self) -> t.Optional[exp.Version]: 4007 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4008 this = "TIMESTAMP" 4009 elif self._match(TokenType.VERSION_SNAPSHOT): 4010 this = "VERSION" 4011 else: 4012 return None 4013 4014 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4015 kind = self._prev.text.upper() 4016 start = self._parse_bitwise() 4017 self._match_texts(("TO", "AND")) 4018 end = self._parse_bitwise() 4019 expression: t.Optional[exp.Expression] = self.expression( 4020 exp.Tuple, expressions=[start, end] 4021 ) 4022 elif self._match_text_seq("CONTAINED", "IN"): 4023 kind = "CONTAINED IN" 4024 expression = self.expression( 4025 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4026 ) 4027 elif self._match(TokenType.ALL): 4028 kind = "ALL" 4029 expression = None 4030 else: 4031 self._match_text_seq("AS", "OF") 4032 kind = "AS OF" 4033 expression = self._parse_type() 4034 4035 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4036 4037 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4038 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4039 index = self._index 4040 historical_data = None 4041 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4042 this = self._prev.text.upper() 4043 kind = ( 4044 self._match(TokenType.L_PAREN) 4045 and self._match_texts(self.HISTORICAL_DATA_KIND) 4046 and self._prev.text.upper() 4047 ) 4048 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4049 4050 if expression: 4051 self._match_r_paren() 4052 historical_data = self.expression( 4053 exp.HistoricalData, this=this, kind=kind, expression=expression 4054 ) 4055 else: 4056 self._retreat(index) 4057 4058 return historical_data 4059 4060 def _parse_changes(self) -> t.Optional[exp.Changes]: 4061 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4062 return None 4063 4064 information = self._parse_var(any_token=True) 4065 self._match_r_paren() 4066 4067 return self.expression( 4068 exp.Changes, 4069 information=information, 4070 at_before=self._parse_historical_data(), 4071 end=self._parse_historical_data(), 4072 ) 4073 4074 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4075 if not self._match(TokenType.UNNEST): 4076 return None 4077 4078 expressions = self._parse_wrapped_csv(self._parse_equality) 4079 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4080 4081 alias = self._parse_table_alias() if with_alias else None 4082 4083 if alias: 4084 if self.dialect.UNNEST_COLUMN_ONLY: 4085 if alias.args.get("columns"): 4086 self.raise_error("Unexpected extra column alias in unnest.") 4087 4088 alias.set("columns", [alias.this]) 4089 alias.set("this", None) 4090 4091 columns = alias.args.get("columns") or [] 4092 if offset and len(expressions) < len(columns): 4093 offset = columns.pop() 4094 4095 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4096 self._match(TokenType.ALIAS) 4097 offset = self._parse_id_var( 4098 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4099 ) or exp.to_identifier("offset") 4100 4101 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4102 4103 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4104 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4105 if not is_derived and not ( 4106 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4107 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4108 ): 4109 return None 4110 4111 expressions = self._parse_csv(self._parse_value) 4112 alias = self._parse_table_alias() 4113 4114 if is_derived: 4115 self._match_r_paren() 4116 4117 return self.expression( 4118 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4119 ) 4120 4121 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4122 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4123 as_modifier and self._match_text_seq("USING", "SAMPLE") 4124 ): 4125 return None 4126 4127 bucket_numerator = None 4128 bucket_denominator = None 4129 bucket_field = None 4130 percent = None 4131 size = None 4132 seed = None 4133 4134 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4135 matched_l_paren = self._match(TokenType.L_PAREN) 4136 4137 if self.TABLESAMPLE_CSV: 4138 num = None 4139 expressions = self._parse_csv(self._parse_primary) 4140 else: 4141 expressions = None 4142 num = ( 4143 self._parse_factor() 4144 if self._match(TokenType.NUMBER, advance=False) 4145 else self._parse_primary() or self._parse_placeholder() 4146 ) 4147 4148 if self._match_text_seq("BUCKET"): 4149 bucket_numerator = self._parse_number() 4150 self._match_text_seq("OUT", "OF") 4151 bucket_denominator = bucket_denominator = self._parse_number() 4152 self._match(TokenType.ON) 4153 bucket_field = self._parse_field() 4154 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4155 percent = num 4156 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4157 size = num 4158 else: 4159 percent = num 4160 4161 if matched_l_paren: 4162 self._match_r_paren() 4163 4164 if self._match(TokenType.L_PAREN): 4165 method = self._parse_var(upper=True) 4166 seed = self._match(TokenType.COMMA) and self._parse_number() 4167 self._match_r_paren() 4168 elif self._match_texts(("SEED", "REPEATABLE")): 4169 seed = self._parse_wrapped(self._parse_number) 4170 4171 if not method and self.DEFAULT_SAMPLING_METHOD: 4172 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4173 4174 return self.expression( 4175 exp.TableSample, 4176 expressions=expressions, 4177 method=method, 4178 bucket_numerator=bucket_numerator, 4179 bucket_denominator=bucket_denominator, 4180 bucket_field=bucket_field, 4181 percent=percent, 4182 size=size, 4183 seed=seed, 4184 ) 4185 4186 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4187 return list(iter(self._parse_pivot, None)) or None 4188 4189 def _parse_joins(self) -> t.Iterator[exp.Join]: 4190 return iter(self._parse_join, None) 4191 4192 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4193 if not self._match(TokenType.INTO): 4194 return None 4195 4196 return self.expression( 4197 exp.UnpivotColumns, 4198 this=self._match_text_seq("NAME") and self._parse_column(), 4199 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4200 ) 4201 4202 # https://duckdb.org/docs/sql/statements/pivot 4203 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4204 def _parse_on() -> t.Optional[exp.Expression]: 4205 this = self._parse_bitwise() 4206 4207 if self._match(TokenType.IN): 4208 # PIVOT ... ON col IN (row_val1, row_val2) 4209 return self._parse_in(this) 4210 if self._match(TokenType.ALIAS, advance=False): 4211 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4212 return self._parse_alias(this) 4213 4214 return this 4215 4216 this = self._parse_table() 4217 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4218 into = self._parse_unpivot_columns() 4219 using = self._match(TokenType.USING) and self._parse_csv( 4220 lambda: self._parse_alias(self._parse_function()) 4221 ) 4222 group = self._parse_group() 4223 4224 return self.expression( 4225 exp.Pivot, 4226 this=this, 4227 expressions=expressions, 4228 using=using, 4229 group=group, 4230 unpivot=is_unpivot, 4231 into=into, 4232 ) 4233 4234 def _parse_pivot_in(self) -> exp.In: 4235 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4236 this = self._parse_select_or_expression() 4237 4238 self._match(TokenType.ALIAS) 4239 alias = self._parse_bitwise() 4240 if alias: 4241 if isinstance(alias, exp.Column) and not alias.db: 4242 alias = alias.this 4243 return self.expression(exp.PivotAlias, this=this, alias=alias) 4244 4245 return this 4246 4247 value = self._parse_column() 4248 4249 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4250 self.raise_error("Expecting IN (") 4251 4252 if self._match(TokenType.ANY): 4253 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4254 else: 4255 exprs = self._parse_csv(_parse_aliased_expression) 4256 4257 self._match_r_paren() 4258 return self.expression(exp.In, this=value, expressions=exprs) 4259 4260 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4261 index = self._index 4262 include_nulls = None 4263 4264 if self._match(TokenType.PIVOT): 4265 unpivot = False 4266 elif self._match(TokenType.UNPIVOT): 4267 unpivot = True 4268 4269 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4270 if self._match_text_seq("INCLUDE", "NULLS"): 4271 include_nulls = True 4272 elif self._match_text_seq("EXCLUDE", "NULLS"): 4273 include_nulls = False 4274 else: 4275 return None 4276 4277 expressions = [] 4278 4279 if not self._match(TokenType.L_PAREN): 4280 self._retreat(index) 4281 return None 4282 4283 if unpivot: 4284 expressions = self._parse_csv(self._parse_column) 4285 else: 4286 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4287 4288 if not expressions: 4289 self.raise_error("Failed to parse PIVOT's aggregation list") 4290 4291 if not self._match(TokenType.FOR): 4292 self.raise_error("Expecting FOR") 4293 4294 fields = [] 4295 while True: 4296 field = self._try_parse(self._parse_pivot_in) 4297 if not field: 4298 break 4299 fields.append(field) 4300 4301 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4302 self._parse_bitwise 4303 ) 4304 4305 group = self._parse_group() 4306 4307 self._match_r_paren() 4308 4309 pivot = self.expression( 4310 exp.Pivot, 4311 expressions=expressions, 4312 fields=fields, 4313 unpivot=unpivot, 4314 include_nulls=include_nulls, 4315 default_on_null=default_on_null, 4316 group=group, 4317 ) 4318 4319 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4320 pivot.set("alias", self._parse_table_alias()) 4321 4322 if not unpivot: 4323 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4324 4325 columns: t.List[exp.Expression] = [] 4326 all_fields = [] 4327 for pivot_field in pivot.fields: 4328 pivot_field_expressions = pivot_field.expressions 4329 4330 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4331 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4332 continue 4333 4334 all_fields.append( 4335 [ 4336 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4337 for fld in pivot_field_expressions 4338 ] 4339 ) 4340 4341 if all_fields: 4342 if names: 4343 all_fields.append(names) 4344 4345 # Generate all possible combinations of the pivot columns 4346 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4347 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4348 for fld_parts_tuple in itertools.product(*all_fields): 4349 fld_parts = list(fld_parts_tuple) 4350 4351 if names and self.PREFIXED_PIVOT_COLUMNS: 4352 # Move the "name" to the front of the list 4353 fld_parts.insert(0, fld_parts.pop(-1)) 4354 4355 columns.append(exp.to_identifier("_".join(fld_parts))) 4356 4357 pivot.set("columns", columns) 4358 4359 return pivot 4360 4361 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4362 return [agg.alias for agg in aggregations if agg.alias] 4363 4364 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4365 if not skip_where_token and not self._match(TokenType.PREWHERE): 4366 return None 4367 4368 return self.expression( 4369 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4370 ) 4371 4372 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4373 if not skip_where_token and not self._match(TokenType.WHERE): 4374 return None 4375 4376 return self.expression( 4377 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4378 ) 4379 4380 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4381 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4382 return None 4383 4384 elements: t.Dict[str, t.Any] = defaultdict(list) 4385 4386 if self._match(TokenType.ALL): 4387 elements["all"] = True 4388 elif self._match(TokenType.DISTINCT): 4389 elements["all"] = False 4390 4391 while True: 4392 index = self._index 4393 4394 elements["expressions"].extend( 4395 self._parse_csv( 4396 lambda: None 4397 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4398 else self._parse_assignment() 4399 ) 4400 ) 4401 4402 before_with_index = self._index 4403 with_prefix = self._match(TokenType.WITH) 4404 4405 if self._match(TokenType.ROLLUP): 4406 elements["rollup"].append( 4407 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4408 ) 4409 elif self._match(TokenType.CUBE): 4410 elements["cube"].append( 4411 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4412 ) 4413 elif self._match(TokenType.GROUPING_SETS): 4414 elements["grouping_sets"].append( 4415 self.expression( 4416 exp.GroupingSets, 4417 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4418 ) 4419 ) 4420 elif self._match_text_seq("TOTALS"): 4421 elements["totals"] = True # type: ignore 4422 4423 if before_with_index <= self._index <= before_with_index + 1: 4424 self._retreat(before_with_index) 4425 break 4426 4427 if index == self._index: 4428 break 4429 4430 return self.expression(exp.Group, **elements) # type: ignore 4431 4432 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4433 return self.expression( 4434 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4435 ) 4436 4437 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4438 if self._match(TokenType.L_PAREN): 4439 grouping_set = self._parse_csv(self._parse_column) 4440 self._match_r_paren() 4441 return self.expression(exp.Tuple, expressions=grouping_set) 4442 4443 return self._parse_column() 4444 4445 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4446 if not skip_having_token and not self._match(TokenType.HAVING): 4447 return None 4448 return self.expression(exp.Having, this=self._parse_assignment()) 4449 4450 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4451 if not self._match(TokenType.QUALIFY): 4452 return None 4453 return self.expression(exp.Qualify, this=self._parse_assignment()) 4454 4455 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4456 if skip_start_token: 4457 start = None 4458 elif self._match(TokenType.START_WITH): 4459 start = self._parse_assignment() 4460 else: 4461 return None 4462 4463 self._match(TokenType.CONNECT_BY) 4464 nocycle = self._match_text_seq("NOCYCLE") 4465 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4466 exp.Prior, this=self._parse_bitwise() 4467 ) 4468 connect = self._parse_assignment() 4469 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4470 4471 if not start and self._match(TokenType.START_WITH): 4472 start = self._parse_assignment() 4473 4474 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4475 4476 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4477 this = self._parse_id_var(any_token=True) 4478 if self._match(TokenType.ALIAS): 4479 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4480 return this 4481 4482 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4483 if self._match_text_seq("INTERPOLATE"): 4484 return self._parse_wrapped_csv(self._parse_name_as_expression) 4485 return None 4486 4487 def _parse_order( 4488 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4489 ) -> t.Optional[exp.Expression]: 4490 siblings = None 4491 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4492 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4493 return this 4494 4495 siblings = True 4496 4497 return self.expression( 4498 exp.Order, 4499 this=this, 4500 expressions=self._parse_csv(self._parse_ordered), 4501 siblings=siblings, 4502 ) 4503 4504 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4505 if not self._match(token): 4506 return None 4507 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4508 4509 def _parse_ordered( 4510 self, parse_method: t.Optional[t.Callable] = None 4511 ) -> t.Optional[exp.Ordered]: 4512 this = parse_method() if parse_method else self._parse_assignment() 4513 if not this: 4514 return None 4515 4516 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4517 this = exp.var("ALL") 4518 4519 asc = self._match(TokenType.ASC) 4520 desc = self._match(TokenType.DESC) or (asc and False) 4521 4522 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4523 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4524 4525 nulls_first = is_nulls_first or False 4526 explicitly_null_ordered = is_nulls_first or is_nulls_last 4527 4528 if ( 4529 not explicitly_null_ordered 4530 and ( 4531 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4532 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4533 ) 4534 and self.dialect.NULL_ORDERING != "nulls_are_last" 4535 ): 4536 nulls_first = True 4537 4538 if self._match_text_seq("WITH", "FILL"): 4539 with_fill = self.expression( 4540 exp.WithFill, 4541 **{ # type: ignore 4542 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4543 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4544 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4545 "interpolate": self._parse_interpolate(), 4546 }, 4547 ) 4548 else: 4549 with_fill = None 4550 4551 return self.expression( 4552 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4553 ) 4554 4555 def _parse_limit_options(self) -> exp.LimitOptions: 4556 percent = self._match(TokenType.PERCENT) 4557 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4558 self._match_text_seq("ONLY") 4559 with_ties = self._match_text_seq("WITH", "TIES") 4560 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4561 4562 def _parse_limit( 4563 self, 4564 this: t.Optional[exp.Expression] = None, 4565 top: bool = False, 4566 skip_limit_token: bool = False, 4567 ) -> t.Optional[exp.Expression]: 4568 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4569 comments = self._prev_comments 4570 if top: 4571 limit_paren = self._match(TokenType.L_PAREN) 4572 expression = self._parse_term() if limit_paren else self._parse_number() 4573 4574 if limit_paren: 4575 self._match_r_paren() 4576 4577 limit_options = self._parse_limit_options() 4578 else: 4579 limit_options = None 4580 expression = self._parse_term() 4581 4582 if self._match(TokenType.COMMA): 4583 offset = expression 4584 expression = self._parse_term() 4585 else: 4586 offset = None 4587 4588 limit_exp = self.expression( 4589 exp.Limit, 4590 this=this, 4591 expression=expression, 4592 offset=offset, 4593 comments=comments, 4594 limit_options=limit_options, 4595 expressions=self._parse_limit_by(), 4596 ) 4597 4598 return limit_exp 4599 4600 if self._match(TokenType.FETCH): 4601 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4602 direction = self._prev.text.upper() if direction else "FIRST" 4603 4604 count = self._parse_field(tokens=self.FETCH_TOKENS) 4605 4606 return self.expression( 4607 exp.Fetch, 4608 direction=direction, 4609 count=count, 4610 limit_options=self._parse_limit_options(), 4611 ) 4612 4613 return this 4614 4615 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4616 if not self._match(TokenType.OFFSET): 4617 return this 4618 4619 count = self._parse_term() 4620 self._match_set((TokenType.ROW, TokenType.ROWS)) 4621 4622 return self.expression( 4623 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4624 ) 4625 4626 def _can_parse_limit_or_offset(self) -> bool: 4627 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4628 return False 4629 4630 index = self._index 4631 result = bool( 4632 self._try_parse(self._parse_limit, retreat=True) 4633 or self._try_parse(self._parse_offset, retreat=True) 4634 ) 4635 self._retreat(index) 4636 return result 4637 4638 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4639 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4640 4641 def _parse_locks(self) -> t.List[exp.Lock]: 4642 locks = [] 4643 while True: 4644 if self._match_text_seq("FOR", "UPDATE"): 4645 update = True 4646 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4647 "LOCK", "IN", "SHARE", "MODE" 4648 ): 4649 update = False 4650 else: 4651 break 4652 4653 expressions = None 4654 if self._match_text_seq("OF"): 4655 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4656 4657 wait: t.Optional[bool | exp.Expression] = None 4658 if self._match_text_seq("NOWAIT"): 4659 wait = True 4660 elif self._match_text_seq("WAIT"): 4661 wait = self._parse_primary() 4662 elif self._match_text_seq("SKIP", "LOCKED"): 4663 wait = False 4664 4665 locks.append( 4666 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4667 ) 4668 4669 return locks 4670 4671 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4672 start = self._index 4673 _, side_token, kind_token = self._parse_join_parts() 4674 4675 side = side_token.text if side_token else None 4676 kind = kind_token.text if kind_token else None 4677 4678 if not self._match_set(self.SET_OPERATIONS): 4679 self._retreat(start) 4680 return None 4681 4682 token_type = self._prev.token_type 4683 4684 if token_type == TokenType.UNION: 4685 operation: t.Type[exp.SetOperation] = exp.Union 4686 elif token_type == TokenType.EXCEPT: 4687 operation = exp.Except 4688 else: 4689 operation = exp.Intersect 4690 4691 comments = self._prev.comments 4692 4693 if self._match(TokenType.DISTINCT): 4694 distinct: t.Optional[bool] = True 4695 elif self._match(TokenType.ALL): 4696 distinct = False 4697 else: 4698 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4699 if distinct is None: 4700 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4701 4702 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4703 "STRICT", "CORRESPONDING" 4704 ) 4705 if self._match_text_seq("CORRESPONDING"): 4706 by_name = True 4707 if not side and not kind: 4708 kind = "INNER" 4709 4710 on_column_list = None 4711 if by_name and self._match_texts(("ON", "BY")): 4712 on_column_list = self._parse_wrapped_csv(self._parse_column) 4713 4714 expression = self._parse_select(nested=True, parse_set_operation=False) 4715 4716 return self.expression( 4717 operation, 4718 comments=comments, 4719 this=this, 4720 distinct=distinct, 4721 by_name=by_name, 4722 expression=expression, 4723 side=side, 4724 kind=kind, 4725 on=on_column_list, 4726 ) 4727 4728 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4729 while True: 4730 setop = self.parse_set_operation(this) 4731 if not setop: 4732 break 4733 this = setop 4734 4735 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4736 expression = this.expression 4737 4738 if expression: 4739 for arg in self.SET_OP_MODIFIERS: 4740 expr = expression.args.get(arg) 4741 if expr: 4742 this.set(arg, expr.pop()) 4743 4744 return this 4745 4746 def _parse_expression(self) -> t.Optional[exp.Expression]: 4747 return self._parse_alias(self._parse_assignment()) 4748 4749 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4750 this = self._parse_disjunction() 4751 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4752 # This allows us to parse <non-identifier token> := <expr> 4753 this = exp.column( 4754 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4755 ) 4756 4757 while self._match_set(self.ASSIGNMENT): 4758 if isinstance(this, exp.Column) and len(this.parts) == 1: 4759 this = this.this 4760 4761 this = self.expression( 4762 self.ASSIGNMENT[self._prev.token_type], 4763 this=this, 4764 comments=self._prev_comments, 4765 expression=self._parse_assignment(), 4766 ) 4767 4768 return this 4769 4770 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4771 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4772 4773 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4774 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4775 4776 def _parse_equality(self) -> t.Optional[exp.Expression]: 4777 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4778 4779 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4780 return self._parse_tokens(self._parse_range, self.COMPARISON) 4781 4782 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4783 this = this or self._parse_bitwise() 4784 negate = self._match(TokenType.NOT) 4785 4786 if self._match_set(self.RANGE_PARSERS): 4787 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4788 if not expression: 4789 return this 4790 4791 this = expression 4792 elif self._match(TokenType.ISNULL): 4793 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4794 4795 # Postgres supports ISNULL and NOTNULL for conditions. 4796 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4797 if self._match(TokenType.NOTNULL): 4798 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4799 this = self.expression(exp.Not, this=this) 4800 4801 if negate: 4802 this = self._negate_range(this) 4803 4804 if self._match(TokenType.IS): 4805 this = self._parse_is(this) 4806 4807 return this 4808 4809 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4810 if not this: 4811 return this 4812 4813 return self.expression(exp.Not, this=this) 4814 4815 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4816 index = self._index - 1 4817 negate = self._match(TokenType.NOT) 4818 4819 if self._match_text_seq("DISTINCT", "FROM"): 4820 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4821 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4822 4823 if self._match(TokenType.JSON): 4824 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4825 4826 if self._match_text_seq("WITH"): 4827 _with = True 4828 elif self._match_text_seq("WITHOUT"): 4829 _with = False 4830 else: 4831 _with = None 4832 4833 unique = self._match(TokenType.UNIQUE) 4834 self._match_text_seq("KEYS") 4835 expression: t.Optional[exp.Expression] = self.expression( 4836 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4837 ) 4838 else: 4839 expression = self._parse_primary() or self._parse_null() 4840 if not expression: 4841 self._retreat(index) 4842 return None 4843 4844 this = self.expression(exp.Is, this=this, expression=expression) 4845 return self.expression(exp.Not, this=this) if negate else this 4846 4847 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4848 unnest = self._parse_unnest(with_alias=False) 4849 if unnest: 4850 this = self.expression(exp.In, this=this, unnest=unnest) 4851 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4852 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4853 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4854 4855 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4856 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4857 else: 4858 this = self.expression(exp.In, this=this, expressions=expressions) 4859 4860 if matched_l_paren: 4861 self._match_r_paren(this) 4862 elif not self._match(TokenType.R_BRACKET, expression=this): 4863 self.raise_error("Expecting ]") 4864 else: 4865 this = self.expression(exp.In, this=this, field=self._parse_column()) 4866 4867 return this 4868 4869 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4870 low = self._parse_bitwise() 4871 self._match(TokenType.AND) 4872 high = self._parse_bitwise() 4873 return self.expression(exp.Between, this=this, low=low, high=high) 4874 4875 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4876 if not self._match(TokenType.ESCAPE): 4877 return this 4878 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4879 4880 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4881 index = self._index 4882 4883 if not self._match(TokenType.INTERVAL) and match_interval: 4884 return None 4885 4886 if self._match(TokenType.STRING, advance=False): 4887 this = self._parse_primary() 4888 else: 4889 this = self._parse_term() 4890 4891 if not this or ( 4892 isinstance(this, exp.Column) 4893 and not this.table 4894 and not this.this.quoted 4895 and this.name.upper() == "IS" 4896 ): 4897 self._retreat(index) 4898 return None 4899 4900 unit = self._parse_function() or ( 4901 not self._match(TokenType.ALIAS, advance=False) 4902 and self._parse_var(any_token=True, upper=True) 4903 ) 4904 4905 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4906 # each INTERVAL expression into this canonical form so it's easy to transpile 4907 if this and this.is_number: 4908 this = exp.Literal.string(this.to_py()) 4909 elif this and this.is_string: 4910 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4911 if parts and unit: 4912 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4913 unit = None 4914 self._retreat(self._index - 1) 4915 4916 if len(parts) == 1: 4917 this = exp.Literal.string(parts[0][0]) 4918 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4919 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4920 unit = self.expression( 4921 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4922 ) 4923 4924 interval = self.expression(exp.Interval, this=this, unit=unit) 4925 4926 index = self._index 4927 self._match(TokenType.PLUS) 4928 4929 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4930 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4931 return self.expression( 4932 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4933 ) 4934 4935 self._retreat(index) 4936 return interval 4937 4938 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4939 this = self._parse_term() 4940 4941 while True: 4942 if self._match_set(self.BITWISE): 4943 this = self.expression( 4944 self.BITWISE[self._prev.token_type], 4945 this=this, 4946 expression=self._parse_term(), 4947 ) 4948 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4949 this = self.expression( 4950 exp.DPipe, 4951 this=this, 4952 expression=self._parse_term(), 4953 safe=not self.dialect.STRICT_STRING_CONCAT, 4954 ) 4955 elif self._match(TokenType.DQMARK): 4956 this = self.expression( 4957 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4958 ) 4959 elif self._match_pair(TokenType.LT, TokenType.LT): 4960 this = self.expression( 4961 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4962 ) 4963 elif self._match_pair(TokenType.GT, TokenType.GT): 4964 this = self.expression( 4965 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4966 ) 4967 else: 4968 break 4969 4970 return this 4971 4972 def _parse_term(self) -> t.Optional[exp.Expression]: 4973 this = self._parse_factor() 4974 4975 while self._match_set(self.TERM): 4976 klass = self.TERM[self._prev.token_type] 4977 comments = self._prev_comments 4978 expression = self._parse_factor() 4979 4980 this = self.expression(klass, this=this, comments=comments, expression=expression) 4981 4982 if isinstance(this, exp.Collate): 4983 expr = this.expression 4984 4985 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4986 # fallback to Identifier / Var 4987 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4988 ident = expr.this 4989 if isinstance(ident, exp.Identifier): 4990 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4991 4992 return this 4993 4994 def _parse_factor(self) -> t.Optional[exp.Expression]: 4995 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4996 this = parse_method() 4997 4998 while self._match_set(self.FACTOR): 4999 klass = self.FACTOR[self._prev.token_type] 5000 comments = self._prev_comments 5001 expression = parse_method() 5002 5003 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5004 self._retreat(self._index - 1) 5005 return this 5006 5007 this = self.expression(klass, this=this, comments=comments, expression=expression) 5008 5009 if isinstance(this, exp.Div): 5010 this.args["typed"] = self.dialect.TYPED_DIVISION 5011 this.args["safe"] = self.dialect.SAFE_DIVISION 5012 5013 return this 5014 5015 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5016 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5017 5018 def _parse_unary(self) -> t.Optional[exp.Expression]: 5019 if self._match_set(self.UNARY_PARSERS): 5020 return self.UNARY_PARSERS[self._prev.token_type](self) 5021 return self._parse_at_time_zone(self._parse_type()) 5022 5023 def _parse_type( 5024 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5025 ) -> t.Optional[exp.Expression]: 5026 interval = parse_interval and self._parse_interval() 5027 if interval: 5028 return interval 5029 5030 index = self._index 5031 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5032 5033 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5034 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5035 if isinstance(data_type, exp.Cast): 5036 # This constructor can contain ops directly after it, for instance struct unnesting: 5037 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5038 return self._parse_column_ops(data_type) 5039 5040 if data_type: 5041 index2 = self._index 5042 this = self._parse_primary() 5043 5044 if isinstance(this, exp.Literal): 5045 this = self._parse_column_ops(this) 5046 5047 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5048 if parser: 5049 return parser(self, this, data_type) 5050 5051 return self.expression(exp.Cast, this=this, to=data_type) 5052 5053 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5054 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5055 # 5056 # If the index difference here is greater than 1, that means the parser itself must have 5057 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5058 # 5059 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5060 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5061 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5062 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5063 # 5064 # In these cases, we don't really want to return the converted type, but instead retreat 5065 # and try to parse a Column or Identifier in the section below. 5066 if data_type.expressions and index2 - index > 1: 5067 self._retreat(index2) 5068 return self._parse_column_ops(data_type) 5069 5070 self._retreat(index) 5071 5072 if fallback_to_identifier: 5073 return self._parse_id_var() 5074 5075 this = self._parse_column() 5076 return this and self._parse_column_ops(this) 5077 5078 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5079 this = self._parse_type() 5080 if not this: 5081 return None 5082 5083 if isinstance(this, exp.Column) and not this.table: 5084 this = exp.var(this.name.upper()) 5085 5086 return self.expression( 5087 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5088 ) 5089 5090 def _parse_types( 5091 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5092 ) -> t.Optional[exp.Expression]: 5093 index = self._index 5094 5095 this: t.Optional[exp.Expression] = None 5096 prefix = self._match_text_seq("SYSUDTLIB", ".") 5097 5098 if not self._match_set(self.TYPE_TOKENS): 5099 identifier = allow_identifiers and self._parse_id_var( 5100 any_token=False, tokens=(TokenType.VAR,) 5101 ) 5102 if isinstance(identifier, exp.Identifier): 5103 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5104 5105 if len(tokens) != 1: 5106 self.raise_error("Unexpected identifier", self._prev) 5107 5108 if tokens[0].token_type in self.TYPE_TOKENS: 5109 self._prev = tokens[0] 5110 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5111 type_name = identifier.name 5112 5113 while self._match(TokenType.DOT): 5114 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5115 5116 this = exp.DataType.build(type_name, udt=True) 5117 else: 5118 self._retreat(self._index - 1) 5119 return None 5120 else: 5121 return None 5122 5123 type_token = self._prev.token_type 5124 5125 if type_token == TokenType.PSEUDO_TYPE: 5126 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5127 5128 if type_token == TokenType.OBJECT_IDENTIFIER: 5129 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5130 5131 # https://materialize.com/docs/sql/types/map/ 5132 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5133 key_type = self._parse_types( 5134 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5135 ) 5136 if not self._match(TokenType.FARROW): 5137 self._retreat(index) 5138 return None 5139 5140 value_type = self._parse_types( 5141 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5142 ) 5143 if not self._match(TokenType.R_BRACKET): 5144 self._retreat(index) 5145 return None 5146 5147 return exp.DataType( 5148 this=exp.DataType.Type.MAP, 5149 expressions=[key_type, value_type], 5150 nested=True, 5151 prefix=prefix, 5152 ) 5153 5154 nested = type_token in self.NESTED_TYPE_TOKENS 5155 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5156 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5157 expressions = None 5158 maybe_func = False 5159 5160 if self._match(TokenType.L_PAREN): 5161 if is_struct: 5162 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5163 elif nested: 5164 expressions = self._parse_csv( 5165 lambda: self._parse_types( 5166 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5167 ) 5168 ) 5169 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5170 this = expressions[0] 5171 this.set("nullable", True) 5172 self._match_r_paren() 5173 return this 5174 elif type_token in self.ENUM_TYPE_TOKENS: 5175 expressions = self._parse_csv(self._parse_equality) 5176 elif is_aggregate: 5177 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5178 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5179 ) 5180 if not func_or_ident: 5181 return None 5182 expressions = [func_or_ident] 5183 if self._match(TokenType.COMMA): 5184 expressions.extend( 5185 self._parse_csv( 5186 lambda: self._parse_types( 5187 check_func=check_func, 5188 schema=schema, 5189 allow_identifiers=allow_identifiers, 5190 ) 5191 ) 5192 ) 5193 else: 5194 expressions = self._parse_csv(self._parse_type_size) 5195 5196 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5197 if type_token == TokenType.VECTOR and len(expressions) == 2: 5198 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5199 5200 if not expressions or not self._match(TokenType.R_PAREN): 5201 self._retreat(index) 5202 return None 5203 5204 maybe_func = True 5205 5206 values: t.Optional[t.List[exp.Expression]] = None 5207 5208 if nested and self._match(TokenType.LT): 5209 if is_struct: 5210 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5211 else: 5212 expressions = self._parse_csv( 5213 lambda: self._parse_types( 5214 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5215 ) 5216 ) 5217 5218 if not self._match(TokenType.GT): 5219 self.raise_error("Expecting >") 5220 5221 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5222 values = self._parse_csv(self._parse_assignment) 5223 if not values and is_struct: 5224 values = None 5225 self._retreat(self._index - 1) 5226 else: 5227 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5228 5229 if type_token in self.TIMESTAMPS: 5230 if self._match_text_seq("WITH", "TIME", "ZONE"): 5231 maybe_func = False 5232 tz_type = ( 5233 exp.DataType.Type.TIMETZ 5234 if type_token in self.TIMES 5235 else exp.DataType.Type.TIMESTAMPTZ 5236 ) 5237 this = exp.DataType(this=tz_type, expressions=expressions) 5238 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5239 maybe_func = False 5240 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5241 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5242 maybe_func = False 5243 elif type_token == TokenType.INTERVAL: 5244 unit = self._parse_var(upper=True) 5245 if unit: 5246 if self._match_text_seq("TO"): 5247 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5248 5249 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5250 else: 5251 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5252 elif type_token == TokenType.VOID: 5253 this = exp.DataType(this=exp.DataType.Type.NULL) 5254 5255 if maybe_func and check_func: 5256 index2 = self._index 5257 peek = self._parse_string() 5258 5259 if not peek: 5260 self._retreat(index) 5261 return None 5262 5263 self._retreat(index2) 5264 5265 if not this: 5266 if self._match_text_seq("UNSIGNED"): 5267 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5268 if not unsigned_type_token: 5269 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5270 5271 type_token = unsigned_type_token or type_token 5272 5273 this = exp.DataType( 5274 this=exp.DataType.Type[type_token.value], 5275 expressions=expressions, 5276 nested=nested, 5277 prefix=prefix, 5278 ) 5279 5280 # Empty arrays/structs are allowed 5281 if values is not None: 5282 cls = exp.Struct if is_struct else exp.Array 5283 this = exp.cast(cls(expressions=values), this, copy=False) 5284 5285 elif expressions: 5286 this.set("expressions", expressions) 5287 5288 # https://materialize.com/docs/sql/types/list/#type-name 5289 while self._match(TokenType.LIST): 5290 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5291 5292 index = self._index 5293 5294 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5295 matched_array = self._match(TokenType.ARRAY) 5296 5297 while self._curr: 5298 datatype_token = self._prev.token_type 5299 matched_l_bracket = self._match(TokenType.L_BRACKET) 5300 5301 if (not matched_l_bracket and not matched_array) or ( 5302 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5303 ): 5304 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5305 # not to be confused with the fixed size array parsing 5306 break 5307 5308 matched_array = False 5309 values = self._parse_csv(self._parse_assignment) or None 5310 if ( 5311 values 5312 and not schema 5313 and ( 5314 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5315 ) 5316 ): 5317 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5318 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5319 self._retreat(index) 5320 break 5321 5322 this = exp.DataType( 5323 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5324 ) 5325 self._match(TokenType.R_BRACKET) 5326 5327 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5328 converter = self.TYPE_CONVERTERS.get(this.this) 5329 if converter: 5330 this = converter(t.cast(exp.DataType, this)) 5331 5332 return this 5333 5334 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5335 index = self._index 5336 5337 if ( 5338 self._curr 5339 and self._next 5340 and self._curr.token_type in self.TYPE_TOKENS 5341 and self._next.token_type in self.TYPE_TOKENS 5342 ): 5343 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5344 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5345 this = self._parse_id_var() 5346 else: 5347 this = ( 5348 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5349 or self._parse_id_var() 5350 ) 5351 5352 self._match(TokenType.COLON) 5353 5354 if ( 5355 type_required 5356 and not isinstance(this, exp.DataType) 5357 and not self._match_set(self.TYPE_TOKENS, advance=False) 5358 ): 5359 self._retreat(index) 5360 return self._parse_types() 5361 5362 return self._parse_column_def(this) 5363 5364 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5365 if not self._match_text_seq("AT", "TIME", "ZONE"): 5366 return this 5367 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5368 5369 def _parse_column(self) -> t.Optional[exp.Expression]: 5370 this = self._parse_column_reference() 5371 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5372 5373 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5374 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5375 5376 return column 5377 5378 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5379 this = self._parse_field() 5380 if ( 5381 not this 5382 and self._match(TokenType.VALUES, advance=False) 5383 and self.VALUES_FOLLOWED_BY_PAREN 5384 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5385 ): 5386 this = self._parse_id_var() 5387 5388 if isinstance(this, exp.Identifier): 5389 # We bubble up comments from the Identifier to the Column 5390 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5391 5392 return this 5393 5394 def _parse_colon_as_variant_extract( 5395 self, this: t.Optional[exp.Expression] 5396 ) -> t.Optional[exp.Expression]: 5397 casts = [] 5398 json_path = [] 5399 escape = None 5400 5401 while self._match(TokenType.COLON): 5402 start_index = self._index 5403 5404 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5405 path = self._parse_column_ops( 5406 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5407 ) 5408 5409 # The cast :: operator has a lower precedence than the extraction operator :, so 5410 # we rearrange the AST appropriately to avoid casting the JSON path 5411 while isinstance(path, exp.Cast): 5412 casts.append(path.to) 5413 path = path.this 5414 5415 if casts: 5416 dcolon_offset = next( 5417 i 5418 for i, t in enumerate(self._tokens[start_index:]) 5419 if t.token_type == TokenType.DCOLON 5420 ) 5421 end_token = self._tokens[start_index + dcolon_offset - 1] 5422 else: 5423 end_token = self._prev 5424 5425 if path: 5426 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5427 # it'll roundtrip to a string literal in GET_PATH 5428 if isinstance(path, exp.Identifier) and path.quoted: 5429 escape = True 5430 5431 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5432 5433 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5434 # Databricks transforms it back to the colon/dot notation 5435 if json_path: 5436 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5437 5438 if json_path_expr: 5439 json_path_expr.set("escape", escape) 5440 5441 this = self.expression( 5442 exp.JSONExtract, 5443 this=this, 5444 expression=json_path_expr, 5445 variant_extract=True, 5446 ) 5447 5448 while casts: 5449 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5450 5451 return this 5452 5453 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5454 return self._parse_types() 5455 5456 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5457 this = self._parse_bracket(this) 5458 5459 while self._match_set(self.COLUMN_OPERATORS): 5460 op_token = self._prev.token_type 5461 op = self.COLUMN_OPERATORS.get(op_token) 5462 5463 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5464 field = self._parse_dcolon() 5465 if not field: 5466 self.raise_error("Expected type") 5467 elif op and self._curr: 5468 field = self._parse_column_reference() or self._parse_bracket() 5469 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5470 field = self._parse_column_ops(field) 5471 else: 5472 field = self._parse_field(any_token=True, anonymous_func=True) 5473 5474 if isinstance(field, (exp.Func, exp.Window)) and this: 5475 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5476 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5477 this = exp.replace_tree( 5478 this, 5479 lambda n: ( 5480 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5481 if n.table 5482 else n.this 5483 ) 5484 if isinstance(n, exp.Column) 5485 else n, 5486 ) 5487 5488 if op: 5489 this = op(self, this, field) 5490 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5491 this = self.expression( 5492 exp.Column, 5493 comments=this.comments, 5494 this=field, 5495 table=this.this, 5496 db=this.args.get("table"), 5497 catalog=this.args.get("db"), 5498 ) 5499 elif isinstance(field, exp.Window): 5500 # Move the exp.Dot's to the window's function 5501 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5502 field.set("this", window_func) 5503 this = field 5504 else: 5505 this = self.expression(exp.Dot, this=this, expression=field) 5506 5507 if field and field.comments: 5508 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5509 5510 this = self._parse_bracket(this) 5511 5512 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5513 5514 def _parse_primary(self) -> t.Optional[exp.Expression]: 5515 if self._match_set(self.PRIMARY_PARSERS): 5516 token_type = self._prev.token_type 5517 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5518 5519 if token_type == TokenType.STRING: 5520 expressions = [primary] 5521 while self._match(TokenType.STRING): 5522 expressions.append(exp.Literal.string(self._prev.text)) 5523 5524 if len(expressions) > 1: 5525 return self.expression(exp.Concat, expressions=expressions) 5526 5527 return primary 5528 5529 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5530 return exp.Literal.number(f"0.{self._prev.text}") 5531 5532 if self._match(TokenType.L_PAREN): 5533 comments = self._prev_comments 5534 query = self._parse_select() 5535 5536 if query: 5537 expressions = [query] 5538 else: 5539 expressions = self._parse_expressions() 5540 5541 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5542 5543 if not this and self._match(TokenType.R_PAREN, advance=False): 5544 this = self.expression(exp.Tuple) 5545 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5546 this = self._parse_subquery(this=this, parse_alias=False) 5547 elif isinstance(this, exp.Subquery): 5548 this = self._parse_subquery( 5549 this=self._parse_set_operations(this), parse_alias=False 5550 ) 5551 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5552 this = self.expression(exp.Tuple, expressions=expressions) 5553 else: 5554 this = self.expression(exp.Paren, this=this) 5555 5556 if this: 5557 this.add_comments(comments) 5558 5559 self._match_r_paren(expression=this) 5560 return this 5561 5562 return None 5563 5564 def _parse_field( 5565 self, 5566 any_token: bool = False, 5567 tokens: t.Optional[t.Collection[TokenType]] = None, 5568 anonymous_func: bool = False, 5569 ) -> t.Optional[exp.Expression]: 5570 if anonymous_func: 5571 field = ( 5572 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5573 or self._parse_primary() 5574 ) 5575 else: 5576 field = self._parse_primary() or self._parse_function( 5577 anonymous=anonymous_func, any_token=any_token 5578 ) 5579 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5580 5581 def _parse_function( 5582 self, 5583 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5584 anonymous: bool = False, 5585 optional_parens: bool = True, 5586 any_token: bool = False, 5587 ) -> t.Optional[exp.Expression]: 5588 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5589 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5590 fn_syntax = False 5591 if ( 5592 self._match(TokenType.L_BRACE, advance=False) 5593 and self._next 5594 and self._next.text.upper() == "FN" 5595 ): 5596 self._advance(2) 5597 fn_syntax = True 5598 5599 func = self._parse_function_call( 5600 functions=functions, 5601 anonymous=anonymous, 5602 optional_parens=optional_parens, 5603 any_token=any_token, 5604 ) 5605 5606 if fn_syntax: 5607 self._match(TokenType.R_BRACE) 5608 5609 return func 5610 5611 def _parse_function_call( 5612 self, 5613 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5614 anonymous: bool = False, 5615 optional_parens: bool = True, 5616 any_token: bool = False, 5617 ) -> t.Optional[exp.Expression]: 5618 if not self._curr: 5619 return None 5620 5621 comments = self._curr.comments 5622 token = self._curr 5623 token_type = self._curr.token_type 5624 this = self._curr.text 5625 upper = this.upper() 5626 5627 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5628 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5629 self._advance() 5630 return self._parse_window(parser(self)) 5631 5632 if not self._next or self._next.token_type != TokenType.L_PAREN: 5633 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5634 self._advance() 5635 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5636 5637 return None 5638 5639 if any_token: 5640 if token_type in self.RESERVED_TOKENS: 5641 return None 5642 elif token_type not in self.FUNC_TOKENS: 5643 return None 5644 5645 self._advance(2) 5646 5647 parser = self.FUNCTION_PARSERS.get(upper) 5648 if parser and not anonymous: 5649 this = parser(self) 5650 else: 5651 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5652 5653 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5654 this = self.expression( 5655 subquery_predicate, comments=comments, this=self._parse_select() 5656 ) 5657 self._match_r_paren() 5658 return this 5659 5660 if functions is None: 5661 functions = self.FUNCTIONS 5662 5663 function = functions.get(upper) 5664 known_function = function and not anonymous 5665 5666 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5667 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5668 5669 post_func_comments = self._curr and self._curr.comments 5670 if known_function and post_func_comments: 5671 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5672 # call we'll construct it as exp.Anonymous, even if it's "known" 5673 if any( 5674 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5675 for comment in post_func_comments 5676 ): 5677 known_function = False 5678 5679 if alias and known_function: 5680 args = self._kv_to_prop_eq(args) 5681 5682 if known_function: 5683 func_builder = t.cast(t.Callable, function) 5684 5685 if "dialect" in func_builder.__code__.co_varnames: 5686 func = func_builder(args, dialect=self.dialect) 5687 else: 5688 func = func_builder(args) 5689 5690 func = self.validate_expression(func, args) 5691 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5692 func.meta["name"] = this 5693 5694 this = func 5695 else: 5696 if token_type == TokenType.IDENTIFIER: 5697 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5698 this = self.expression(exp.Anonymous, this=this, expressions=args) 5699 5700 if isinstance(this, exp.Expression): 5701 this.add_comments(comments) 5702 5703 self._match_r_paren(this) 5704 return self._parse_window(this) 5705 5706 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5707 return expression 5708 5709 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5710 transformed = [] 5711 5712 for index, e in enumerate(expressions): 5713 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5714 if isinstance(e, exp.Alias): 5715 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5716 5717 if not isinstance(e, exp.PropertyEQ): 5718 e = self.expression( 5719 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5720 ) 5721 5722 if isinstance(e.this, exp.Column): 5723 e.this.replace(e.this.this) 5724 else: 5725 e = self._to_prop_eq(e, index) 5726 5727 transformed.append(e) 5728 5729 return transformed 5730 5731 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5732 return self._parse_statement() 5733 5734 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5735 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5736 5737 def _parse_user_defined_function( 5738 self, kind: t.Optional[TokenType] = None 5739 ) -> t.Optional[exp.Expression]: 5740 this = self._parse_table_parts(schema=True) 5741 5742 if not self._match(TokenType.L_PAREN): 5743 return this 5744 5745 expressions = self._parse_csv(self._parse_function_parameter) 5746 self._match_r_paren() 5747 return self.expression( 5748 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5749 ) 5750 5751 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5752 literal = self._parse_primary() 5753 if literal: 5754 return self.expression(exp.Introducer, this=token.text, expression=literal) 5755 5756 return self._identifier_expression(token) 5757 5758 def _parse_session_parameter(self) -> exp.SessionParameter: 5759 kind = None 5760 this = self._parse_id_var() or self._parse_primary() 5761 5762 if this and self._match(TokenType.DOT): 5763 kind = this.name 5764 this = self._parse_var() or self._parse_primary() 5765 5766 return self.expression(exp.SessionParameter, this=this, kind=kind) 5767 5768 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5769 return self._parse_id_var() 5770 5771 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5772 index = self._index 5773 5774 if self._match(TokenType.L_PAREN): 5775 expressions = t.cast( 5776 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5777 ) 5778 5779 if not self._match(TokenType.R_PAREN): 5780 self._retreat(index) 5781 else: 5782 expressions = [self._parse_lambda_arg()] 5783 5784 if self._match_set(self.LAMBDAS): 5785 return self.LAMBDAS[self._prev.token_type](self, expressions) 5786 5787 self._retreat(index) 5788 5789 this: t.Optional[exp.Expression] 5790 5791 if self._match(TokenType.DISTINCT): 5792 this = self.expression( 5793 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5794 ) 5795 else: 5796 this = self._parse_select_or_expression(alias=alias) 5797 5798 return self._parse_limit( 5799 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5800 ) 5801 5802 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5803 index = self._index 5804 if not self._match(TokenType.L_PAREN): 5805 return this 5806 5807 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5808 # expr can be of both types 5809 if self._match_set(self.SELECT_START_TOKENS): 5810 self._retreat(index) 5811 return this 5812 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5813 self._match_r_paren() 5814 return self.expression(exp.Schema, this=this, expressions=args) 5815 5816 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5817 return self._parse_column_def(self._parse_field(any_token=True)) 5818 5819 def _parse_column_def( 5820 self, this: t.Optional[exp.Expression], computed_column: bool = True 5821 ) -> t.Optional[exp.Expression]: 5822 # column defs are not really columns, they're identifiers 5823 if isinstance(this, exp.Column): 5824 this = this.this 5825 5826 if not computed_column: 5827 self._match(TokenType.ALIAS) 5828 5829 kind = self._parse_types(schema=True) 5830 5831 if self._match_text_seq("FOR", "ORDINALITY"): 5832 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5833 5834 constraints: t.List[exp.Expression] = [] 5835 5836 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5837 ("ALIAS", "MATERIALIZED") 5838 ): 5839 persisted = self._prev.text.upper() == "MATERIALIZED" 5840 constraint_kind = exp.ComputedColumnConstraint( 5841 this=self._parse_assignment(), 5842 persisted=persisted or self._match_text_seq("PERSISTED"), 5843 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5844 ) 5845 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5846 elif ( 5847 kind 5848 and self._match(TokenType.ALIAS, advance=False) 5849 and ( 5850 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5851 or (self._next and self._next.token_type == TokenType.L_PAREN) 5852 ) 5853 ): 5854 self._advance() 5855 constraints.append( 5856 self.expression( 5857 exp.ColumnConstraint, 5858 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5859 ) 5860 ) 5861 5862 while True: 5863 constraint = self._parse_column_constraint() 5864 if not constraint: 5865 break 5866 constraints.append(constraint) 5867 5868 if not kind and not constraints: 5869 return this 5870 5871 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5872 5873 def _parse_auto_increment( 5874 self, 5875 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5876 start = None 5877 increment = None 5878 5879 if self._match(TokenType.L_PAREN, advance=False): 5880 args = self._parse_wrapped_csv(self._parse_bitwise) 5881 start = seq_get(args, 0) 5882 increment = seq_get(args, 1) 5883 elif self._match_text_seq("START"): 5884 start = self._parse_bitwise() 5885 self._match_text_seq("INCREMENT") 5886 increment = self._parse_bitwise() 5887 5888 if start and increment: 5889 return exp.GeneratedAsIdentityColumnConstraint( 5890 start=start, increment=increment, this=False 5891 ) 5892 5893 return exp.AutoIncrementColumnConstraint() 5894 5895 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5896 if not self._match_text_seq("REFRESH"): 5897 self._retreat(self._index - 1) 5898 return None 5899 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5900 5901 def _parse_compress(self) -> exp.CompressColumnConstraint: 5902 if self._match(TokenType.L_PAREN, advance=False): 5903 return self.expression( 5904 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5905 ) 5906 5907 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5908 5909 def _parse_generated_as_identity( 5910 self, 5911 ) -> ( 5912 exp.GeneratedAsIdentityColumnConstraint 5913 | exp.ComputedColumnConstraint 5914 | exp.GeneratedAsRowColumnConstraint 5915 ): 5916 if self._match_text_seq("BY", "DEFAULT"): 5917 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5918 this = self.expression( 5919 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5920 ) 5921 else: 5922 self._match_text_seq("ALWAYS") 5923 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5924 5925 self._match(TokenType.ALIAS) 5926 5927 if self._match_text_seq("ROW"): 5928 start = self._match_text_seq("START") 5929 if not start: 5930 self._match(TokenType.END) 5931 hidden = self._match_text_seq("HIDDEN") 5932 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5933 5934 identity = self._match_text_seq("IDENTITY") 5935 5936 if self._match(TokenType.L_PAREN): 5937 if self._match(TokenType.START_WITH): 5938 this.set("start", self._parse_bitwise()) 5939 if self._match_text_seq("INCREMENT", "BY"): 5940 this.set("increment", self._parse_bitwise()) 5941 if self._match_text_seq("MINVALUE"): 5942 this.set("minvalue", self._parse_bitwise()) 5943 if self._match_text_seq("MAXVALUE"): 5944 this.set("maxvalue", self._parse_bitwise()) 5945 5946 if self._match_text_seq("CYCLE"): 5947 this.set("cycle", True) 5948 elif self._match_text_seq("NO", "CYCLE"): 5949 this.set("cycle", False) 5950 5951 if not identity: 5952 this.set("expression", self._parse_range()) 5953 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5954 args = self._parse_csv(self._parse_bitwise) 5955 this.set("start", seq_get(args, 0)) 5956 this.set("increment", seq_get(args, 1)) 5957 5958 self._match_r_paren() 5959 5960 return this 5961 5962 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5963 self._match_text_seq("LENGTH") 5964 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5965 5966 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5967 if self._match_text_seq("NULL"): 5968 return self.expression(exp.NotNullColumnConstraint) 5969 if self._match_text_seq("CASESPECIFIC"): 5970 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5971 if self._match_text_seq("FOR", "REPLICATION"): 5972 return self.expression(exp.NotForReplicationColumnConstraint) 5973 5974 # Unconsume the `NOT` token 5975 self._retreat(self._index - 1) 5976 return None 5977 5978 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5979 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5980 5981 procedure_option_follows = ( 5982 self._match(TokenType.WITH, advance=False) 5983 and self._next 5984 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5985 ) 5986 5987 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5988 return self.expression( 5989 exp.ColumnConstraint, 5990 this=this, 5991 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5992 ) 5993 5994 return this 5995 5996 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5997 if not self._match(TokenType.CONSTRAINT): 5998 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5999 6000 return self.expression( 6001 exp.Constraint, 6002 this=self._parse_id_var(), 6003 expressions=self._parse_unnamed_constraints(), 6004 ) 6005 6006 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6007 constraints = [] 6008 while True: 6009 constraint = self._parse_unnamed_constraint() or self._parse_function() 6010 if not constraint: 6011 break 6012 constraints.append(constraint) 6013 6014 return constraints 6015 6016 def _parse_unnamed_constraint( 6017 self, constraints: t.Optional[t.Collection[str]] = None 6018 ) -> t.Optional[exp.Expression]: 6019 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6020 constraints or self.CONSTRAINT_PARSERS 6021 ): 6022 return None 6023 6024 constraint = self._prev.text.upper() 6025 if constraint not in self.CONSTRAINT_PARSERS: 6026 self.raise_error(f"No parser found for schema constraint {constraint}.") 6027 6028 return self.CONSTRAINT_PARSERS[constraint](self) 6029 6030 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6031 return self._parse_id_var(any_token=False) 6032 6033 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6034 self._match_text_seq("KEY") 6035 return self.expression( 6036 exp.UniqueColumnConstraint, 6037 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6038 this=self._parse_schema(self._parse_unique_key()), 6039 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6040 on_conflict=self._parse_on_conflict(), 6041 options=self._parse_key_constraint_options(), 6042 ) 6043 6044 def _parse_key_constraint_options(self) -> t.List[str]: 6045 options = [] 6046 while True: 6047 if not self._curr: 6048 break 6049 6050 if self._match(TokenType.ON): 6051 action = None 6052 on = self._advance_any() and self._prev.text 6053 6054 if self._match_text_seq("NO", "ACTION"): 6055 action = "NO ACTION" 6056 elif self._match_text_seq("CASCADE"): 6057 action = "CASCADE" 6058 elif self._match_text_seq("RESTRICT"): 6059 action = "RESTRICT" 6060 elif self._match_pair(TokenType.SET, TokenType.NULL): 6061 action = "SET NULL" 6062 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6063 action = "SET DEFAULT" 6064 else: 6065 self.raise_error("Invalid key constraint") 6066 6067 options.append(f"ON {on} {action}") 6068 else: 6069 var = self._parse_var_from_options( 6070 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6071 ) 6072 if not var: 6073 break 6074 options.append(var.name) 6075 6076 return options 6077 6078 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6079 if match and not self._match(TokenType.REFERENCES): 6080 return None 6081 6082 expressions = None 6083 this = self._parse_table(schema=True) 6084 options = self._parse_key_constraint_options() 6085 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6086 6087 def _parse_foreign_key(self) -> exp.ForeignKey: 6088 expressions = self._parse_wrapped_id_vars() 6089 reference = self._parse_references() 6090 on_options = {} 6091 6092 while self._match(TokenType.ON): 6093 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6094 self.raise_error("Expected DELETE or UPDATE") 6095 6096 kind = self._prev.text.lower() 6097 6098 if self._match_text_seq("NO", "ACTION"): 6099 action = "NO ACTION" 6100 elif self._match(TokenType.SET): 6101 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6102 action = "SET " + self._prev.text.upper() 6103 else: 6104 self._advance() 6105 action = self._prev.text.upper() 6106 6107 on_options[kind] = action 6108 6109 return self.expression( 6110 exp.ForeignKey, 6111 expressions=expressions, 6112 reference=reference, 6113 options=self._parse_key_constraint_options(), 6114 **on_options, # type: ignore 6115 ) 6116 6117 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6118 return self._parse_ordered() or self._parse_field() 6119 6120 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6121 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6122 self._retreat(self._index - 1) 6123 return None 6124 6125 id_vars = self._parse_wrapped_id_vars() 6126 return self.expression( 6127 exp.PeriodForSystemTimeConstraint, 6128 this=seq_get(id_vars, 0), 6129 expression=seq_get(id_vars, 1), 6130 ) 6131 6132 def _parse_primary_key( 6133 self, wrapped_optional: bool = False, in_props: bool = False 6134 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6135 desc = ( 6136 self._match_set((TokenType.ASC, TokenType.DESC)) 6137 and self._prev.token_type == TokenType.DESC 6138 ) 6139 6140 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6141 return self.expression( 6142 exp.PrimaryKeyColumnConstraint, 6143 desc=desc, 6144 options=self._parse_key_constraint_options(), 6145 ) 6146 6147 expressions = self._parse_wrapped_csv( 6148 self._parse_primary_key_part, optional=wrapped_optional 6149 ) 6150 options = self._parse_key_constraint_options() 6151 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6152 6153 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6154 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6155 6156 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6157 """ 6158 Parses a datetime column in ODBC format. We parse the column into the corresponding 6159 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6160 same as we did for `DATE('yyyy-mm-dd')`. 6161 6162 Reference: 6163 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6164 """ 6165 self._match(TokenType.VAR) 6166 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6167 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6168 if not self._match(TokenType.R_BRACE): 6169 self.raise_error("Expected }") 6170 return expression 6171 6172 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6173 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6174 return this 6175 6176 bracket_kind = self._prev.token_type 6177 if ( 6178 bracket_kind == TokenType.L_BRACE 6179 and self._curr 6180 and self._curr.token_type == TokenType.VAR 6181 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6182 ): 6183 return self._parse_odbc_datetime_literal() 6184 6185 expressions = self._parse_csv( 6186 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6187 ) 6188 6189 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6190 self.raise_error("Expected ]") 6191 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6192 self.raise_error("Expected }") 6193 6194 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6195 if bracket_kind == TokenType.L_BRACE: 6196 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6197 elif not this: 6198 this = build_array_constructor( 6199 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6200 ) 6201 else: 6202 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6203 if constructor_type: 6204 return build_array_constructor( 6205 constructor_type, 6206 args=expressions, 6207 bracket_kind=bracket_kind, 6208 dialect=self.dialect, 6209 ) 6210 6211 expressions = apply_index_offset( 6212 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6213 ) 6214 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6215 6216 self._add_comments(this) 6217 return self._parse_bracket(this) 6218 6219 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6220 if self._match(TokenType.COLON): 6221 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6222 return this 6223 6224 def _parse_case(self) -> t.Optional[exp.Expression]: 6225 ifs = [] 6226 default = None 6227 6228 comments = self._prev_comments 6229 expression = self._parse_assignment() 6230 6231 while self._match(TokenType.WHEN): 6232 this = self._parse_assignment() 6233 self._match(TokenType.THEN) 6234 then = self._parse_assignment() 6235 ifs.append(self.expression(exp.If, this=this, true=then)) 6236 6237 if self._match(TokenType.ELSE): 6238 default = self._parse_assignment() 6239 6240 if not self._match(TokenType.END): 6241 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6242 default = exp.column("interval") 6243 else: 6244 self.raise_error("Expected END after CASE", self._prev) 6245 6246 return self.expression( 6247 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6248 ) 6249 6250 def _parse_if(self) -> t.Optional[exp.Expression]: 6251 if self._match(TokenType.L_PAREN): 6252 args = self._parse_csv( 6253 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6254 ) 6255 this = self.validate_expression(exp.If.from_arg_list(args), args) 6256 self._match_r_paren() 6257 else: 6258 index = self._index - 1 6259 6260 if self.NO_PAREN_IF_COMMANDS and index == 0: 6261 return self._parse_as_command(self._prev) 6262 6263 condition = self._parse_assignment() 6264 6265 if not condition: 6266 self._retreat(index) 6267 return None 6268 6269 self._match(TokenType.THEN) 6270 true = self._parse_assignment() 6271 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6272 self._match(TokenType.END) 6273 this = self.expression(exp.If, this=condition, true=true, false=false) 6274 6275 return this 6276 6277 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6278 if not self._match_text_seq("VALUE", "FOR"): 6279 self._retreat(self._index - 1) 6280 return None 6281 6282 return self.expression( 6283 exp.NextValueFor, 6284 this=self._parse_column(), 6285 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6286 ) 6287 6288 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6289 this = self._parse_function() or self._parse_var_or_string(upper=True) 6290 6291 if self._match(TokenType.FROM): 6292 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6293 6294 if not self._match(TokenType.COMMA): 6295 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6296 6297 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6298 6299 def _parse_gap_fill(self) -> exp.GapFill: 6300 self._match(TokenType.TABLE) 6301 this = self._parse_table() 6302 6303 self._match(TokenType.COMMA) 6304 args = [this, *self._parse_csv(self._parse_lambda)] 6305 6306 gap_fill = exp.GapFill.from_arg_list(args) 6307 return self.validate_expression(gap_fill, args) 6308 6309 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6310 this = self._parse_assignment() 6311 6312 if not self._match(TokenType.ALIAS): 6313 if self._match(TokenType.COMMA): 6314 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6315 6316 self.raise_error("Expected AS after CAST") 6317 6318 fmt = None 6319 to = self._parse_types() 6320 6321 default = self._match(TokenType.DEFAULT) 6322 if default: 6323 default = self._parse_bitwise() 6324 self._match_text_seq("ON", "CONVERSION", "ERROR") 6325 6326 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6327 fmt_string = self._parse_string() 6328 fmt = self._parse_at_time_zone(fmt_string) 6329 6330 if not to: 6331 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6332 if to.this in exp.DataType.TEMPORAL_TYPES: 6333 this = self.expression( 6334 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6335 this=this, 6336 format=exp.Literal.string( 6337 format_time( 6338 fmt_string.this if fmt_string else "", 6339 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6340 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6341 ) 6342 ), 6343 safe=safe, 6344 ) 6345 6346 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6347 this.set("zone", fmt.args["zone"]) 6348 return this 6349 elif not to: 6350 self.raise_error("Expected TYPE after CAST") 6351 elif isinstance(to, exp.Identifier): 6352 to = exp.DataType.build(to.name, udt=True) 6353 elif to.this == exp.DataType.Type.CHAR: 6354 if self._match(TokenType.CHARACTER_SET): 6355 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6356 6357 return self.expression( 6358 exp.Cast if strict else exp.TryCast, 6359 this=this, 6360 to=to, 6361 format=fmt, 6362 safe=safe, 6363 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6364 default=default, 6365 ) 6366 6367 def _parse_string_agg(self) -> exp.GroupConcat: 6368 if self._match(TokenType.DISTINCT): 6369 args: t.List[t.Optional[exp.Expression]] = [ 6370 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6371 ] 6372 if self._match(TokenType.COMMA): 6373 args.extend(self._parse_csv(self._parse_assignment)) 6374 else: 6375 args = self._parse_csv(self._parse_assignment) # type: ignore 6376 6377 if self._match_text_seq("ON", "OVERFLOW"): 6378 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6379 if self._match_text_seq("ERROR"): 6380 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6381 else: 6382 self._match_text_seq("TRUNCATE") 6383 on_overflow = self.expression( 6384 exp.OverflowTruncateBehavior, 6385 this=self._parse_string(), 6386 with_count=( 6387 self._match_text_seq("WITH", "COUNT") 6388 or not self._match_text_seq("WITHOUT", "COUNT") 6389 ), 6390 ) 6391 else: 6392 on_overflow = None 6393 6394 index = self._index 6395 if not self._match(TokenType.R_PAREN) and args: 6396 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6397 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6398 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6399 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6400 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6401 6402 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6403 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6404 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6405 if not self._match_text_seq("WITHIN", "GROUP"): 6406 self._retreat(index) 6407 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6408 6409 # The corresponding match_r_paren will be called in parse_function (caller) 6410 self._match_l_paren() 6411 6412 return self.expression( 6413 exp.GroupConcat, 6414 this=self._parse_order(this=seq_get(args, 0)), 6415 separator=seq_get(args, 1), 6416 on_overflow=on_overflow, 6417 ) 6418 6419 def _parse_convert( 6420 self, strict: bool, safe: t.Optional[bool] = None 6421 ) -> t.Optional[exp.Expression]: 6422 this = self._parse_bitwise() 6423 6424 if self._match(TokenType.USING): 6425 to: t.Optional[exp.Expression] = self.expression( 6426 exp.CharacterSet, this=self._parse_var() 6427 ) 6428 elif self._match(TokenType.COMMA): 6429 to = self._parse_types() 6430 else: 6431 to = None 6432 6433 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6434 6435 def _parse_xml_table(self) -> exp.XMLTable: 6436 namespaces = None 6437 passing = None 6438 columns = None 6439 6440 if self._match_text_seq("XMLNAMESPACES", "("): 6441 namespaces = self._parse_xml_namespace() 6442 self._match_text_seq(")", ",") 6443 6444 this = self._parse_string() 6445 6446 if self._match_text_seq("PASSING"): 6447 # The BY VALUE keywords are optional and are provided for semantic clarity 6448 self._match_text_seq("BY", "VALUE") 6449 passing = self._parse_csv(self._parse_column) 6450 6451 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6452 6453 if self._match_text_seq("COLUMNS"): 6454 columns = self._parse_csv(self._parse_field_def) 6455 6456 return self.expression( 6457 exp.XMLTable, 6458 this=this, 6459 namespaces=namespaces, 6460 passing=passing, 6461 columns=columns, 6462 by_ref=by_ref, 6463 ) 6464 6465 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6466 namespaces = [] 6467 6468 while True: 6469 if self._match(TokenType.DEFAULT): 6470 uri = self._parse_string() 6471 else: 6472 uri = self._parse_alias(self._parse_string()) 6473 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6474 if not self._match(TokenType.COMMA): 6475 break 6476 6477 return namespaces 6478 6479 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6480 """ 6481 There are generally two variants of the DECODE function: 6482 6483 - DECODE(bin, charset) 6484 - DECODE(expression, search, result [, search, result] ... [, default]) 6485 6486 The second variant will always be parsed into a CASE expression. Note that NULL 6487 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6488 instead of relying on pattern matching. 6489 """ 6490 args = self._parse_csv(self._parse_assignment) 6491 6492 if len(args) < 3: 6493 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6494 6495 expression, *expressions = args 6496 if not expression: 6497 return None 6498 6499 ifs = [] 6500 for search, result in zip(expressions[::2], expressions[1::2]): 6501 if not search or not result: 6502 return None 6503 6504 if isinstance(search, exp.Literal): 6505 ifs.append( 6506 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6507 ) 6508 elif isinstance(search, exp.Null): 6509 ifs.append( 6510 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6511 ) 6512 else: 6513 cond = exp.or_( 6514 exp.EQ(this=expression.copy(), expression=search), 6515 exp.and_( 6516 exp.Is(this=expression.copy(), expression=exp.Null()), 6517 exp.Is(this=search.copy(), expression=exp.Null()), 6518 copy=False, 6519 ), 6520 copy=False, 6521 ) 6522 ifs.append(exp.If(this=cond, true=result)) 6523 6524 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6525 6526 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6527 self._match_text_seq("KEY") 6528 key = self._parse_column() 6529 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6530 self._match_text_seq("VALUE") 6531 value = self._parse_bitwise() 6532 6533 if not key and not value: 6534 return None 6535 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6536 6537 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6538 if not this or not self._match_text_seq("FORMAT", "JSON"): 6539 return this 6540 6541 return self.expression(exp.FormatJson, this=this) 6542 6543 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6544 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6545 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6546 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6547 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6548 else: 6549 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6550 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6551 6552 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6553 6554 if not empty and not error and not null: 6555 return None 6556 6557 return self.expression( 6558 exp.OnCondition, 6559 empty=empty, 6560 error=error, 6561 null=null, 6562 ) 6563 6564 def _parse_on_handling( 6565 self, on: str, *values: str 6566 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6567 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6568 for value in values: 6569 if self._match_text_seq(value, "ON", on): 6570 return f"{value} ON {on}" 6571 6572 index = self._index 6573 if self._match(TokenType.DEFAULT): 6574 default_value = self._parse_bitwise() 6575 if self._match_text_seq("ON", on): 6576 return default_value 6577 6578 self._retreat(index) 6579 6580 return None 6581 6582 @t.overload 6583 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6584 6585 @t.overload 6586 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6587 6588 def _parse_json_object(self, agg=False): 6589 star = self._parse_star() 6590 expressions = ( 6591 [star] 6592 if star 6593 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6594 ) 6595 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6596 6597 unique_keys = None 6598 if self._match_text_seq("WITH", "UNIQUE"): 6599 unique_keys = True 6600 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6601 unique_keys = False 6602 6603 self._match_text_seq("KEYS") 6604 6605 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6606 self._parse_type() 6607 ) 6608 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6609 6610 return self.expression( 6611 exp.JSONObjectAgg if agg else exp.JSONObject, 6612 expressions=expressions, 6613 null_handling=null_handling, 6614 unique_keys=unique_keys, 6615 return_type=return_type, 6616 encoding=encoding, 6617 ) 6618 6619 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6620 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6621 if not self._match_text_seq("NESTED"): 6622 this = self._parse_id_var() 6623 kind = self._parse_types(allow_identifiers=False) 6624 nested = None 6625 else: 6626 this = None 6627 kind = None 6628 nested = True 6629 6630 path = self._match_text_seq("PATH") and self._parse_string() 6631 nested_schema = nested and self._parse_json_schema() 6632 6633 return self.expression( 6634 exp.JSONColumnDef, 6635 this=this, 6636 kind=kind, 6637 path=path, 6638 nested_schema=nested_schema, 6639 ) 6640 6641 def _parse_json_schema(self) -> exp.JSONSchema: 6642 self._match_text_seq("COLUMNS") 6643 return self.expression( 6644 exp.JSONSchema, 6645 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6646 ) 6647 6648 def _parse_json_table(self) -> exp.JSONTable: 6649 this = self._parse_format_json(self._parse_bitwise()) 6650 path = self._match(TokenType.COMMA) and self._parse_string() 6651 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6652 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6653 schema = self._parse_json_schema() 6654 6655 return exp.JSONTable( 6656 this=this, 6657 schema=schema, 6658 path=path, 6659 error_handling=error_handling, 6660 empty_handling=empty_handling, 6661 ) 6662 6663 def _parse_match_against(self) -> exp.MatchAgainst: 6664 expressions = self._parse_csv(self._parse_column) 6665 6666 self._match_text_seq(")", "AGAINST", "(") 6667 6668 this = self._parse_string() 6669 6670 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6671 modifier = "IN NATURAL LANGUAGE MODE" 6672 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6673 modifier = f"{modifier} WITH QUERY EXPANSION" 6674 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6675 modifier = "IN BOOLEAN MODE" 6676 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6677 modifier = "WITH QUERY EXPANSION" 6678 else: 6679 modifier = None 6680 6681 return self.expression( 6682 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6683 ) 6684 6685 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6686 def _parse_open_json(self) -> exp.OpenJSON: 6687 this = self._parse_bitwise() 6688 path = self._match(TokenType.COMMA) and self._parse_string() 6689 6690 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6691 this = self._parse_field(any_token=True) 6692 kind = self._parse_types() 6693 path = self._parse_string() 6694 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6695 6696 return self.expression( 6697 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6698 ) 6699 6700 expressions = None 6701 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6702 self._match_l_paren() 6703 expressions = self._parse_csv(_parse_open_json_column_def) 6704 6705 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6706 6707 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6708 args = self._parse_csv(self._parse_bitwise) 6709 6710 if self._match(TokenType.IN): 6711 return self.expression( 6712 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6713 ) 6714 6715 if haystack_first: 6716 haystack = seq_get(args, 0) 6717 needle = seq_get(args, 1) 6718 else: 6719 haystack = seq_get(args, 1) 6720 needle = seq_get(args, 0) 6721 6722 return self.expression( 6723 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6724 ) 6725 6726 def _parse_predict(self) -> exp.Predict: 6727 self._match_text_seq("MODEL") 6728 this = self._parse_table() 6729 6730 self._match(TokenType.COMMA) 6731 self._match_text_seq("TABLE") 6732 6733 return self.expression( 6734 exp.Predict, 6735 this=this, 6736 expression=self._parse_table(), 6737 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6738 ) 6739 6740 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6741 args = self._parse_csv(self._parse_table) 6742 return exp.JoinHint(this=func_name.upper(), expressions=args) 6743 6744 def _parse_substring(self) -> exp.Substring: 6745 # Postgres supports the form: substring(string [from int] [for int]) 6746 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6747 6748 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6749 6750 if self._match(TokenType.FROM): 6751 args.append(self._parse_bitwise()) 6752 if self._match(TokenType.FOR): 6753 if len(args) == 1: 6754 args.append(exp.Literal.number(1)) 6755 args.append(self._parse_bitwise()) 6756 6757 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6758 6759 def _parse_trim(self) -> exp.Trim: 6760 # https://www.w3resource.com/sql/character-functions/trim.php 6761 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6762 6763 position = None 6764 collation = None 6765 expression = None 6766 6767 if self._match_texts(self.TRIM_TYPES): 6768 position = self._prev.text.upper() 6769 6770 this = self._parse_bitwise() 6771 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6772 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6773 expression = self._parse_bitwise() 6774 6775 if invert_order: 6776 this, expression = expression, this 6777 6778 if self._match(TokenType.COLLATE): 6779 collation = self._parse_bitwise() 6780 6781 return self.expression( 6782 exp.Trim, this=this, position=position, expression=expression, collation=collation 6783 ) 6784 6785 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6786 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6787 6788 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6789 return self._parse_window(self._parse_id_var(), alias=True) 6790 6791 def _parse_respect_or_ignore_nulls( 6792 self, this: t.Optional[exp.Expression] 6793 ) -> t.Optional[exp.Expression]: 6794 if self._match_text_seq("IGNORE", "NULLS"): 6795 return self.expression(exp.IgnoreNulls, this=this) 6796 if self._match_text_seq("RESPECT", "NULLS"): 6797 return self.expression(exp.RespectNulls, this=this) 6798 return this 6799 6800 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6801 if self._match(TokenType.HAVING): 6802 self._match_texts(("MAX", "MIN")) 6803 max = self._prev.text.upper() != "MIN" 6804 return self.expression( 6805 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6806 ) 6807 6808 return this 6809 6810 def _parse_window( 6811 self, this: t.Optional[exp.Expression], alias: bool = False 6812 ) -> t.Optional[exp.Expression]: 6813 func = this 6814 comments = func.comments if isinstance(func, exp.Expression) else None 6815 6816 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6817 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6818 if self._match_text_seq("WITHIN", "GROUP"): 6819 order = self._parse_wrapped(self._parse_order) 6820 this = self.expression(exp.WithinGroup, this=this, expression=order) 6821 6822 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6823 self._match(TokenType.WHERE) 6824 this = self.expression( 6825 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6826 ) 6827 self._match_r_paren() 6828 6829 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6830 # Some dialects choose to implement and some do not. 6831 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6832 6833 # There is some code above in _parse_lambda that handles 6834 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6835 6836 # The below changes handle 6837 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6838 6839 # Oracle allows both formats 6840 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6841 # and Snowflake chose to do the same for familiarity 6842 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6843 if isinstance(this, exp.AggFunc): 6844 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6845 6846 if ignore_respect and ignore_respect is not this: 6847 ignore_respect.replace(ignore_respect.this) 6848 this = self.expression(ignore_respect.__class__, this=this) 6849 6850 this = self._parse_respect_or_ignore_nulls(this) 6851 6852 # bigquery select from window x AS (partition by ...) 6853 if alias: 6854 over = None 6855 self._match(TokenType.ALIAS) 6856 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6857 return this 6858 else: 6859 over = self._prev.text.upper() 6860 6861 if comments and isinstance(func, exp.Expression): 6862 func.pop_comments() 6863 6864 if not self._match(TokenType.L_PAREN): 6865 return self.expression( 6866 exp.Window, 6867 comments=comments, 6868 this=this, 6869 alias=self._parse_id_var(False), 6870 over=over, 6871 ) 6872 6873 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6874 6875 first = self._match(TokenType.FIRST) 6876 if self._match_text_seq("LAST"): 6877 first = False 6878 6879 partition, order = self._parse_partition_and_order() 6880 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6881 6882 if kind: 6883 self._match(TokenType.BETWEEN) 6884 start = self._parse_window_spec() 6885 self._match(TokenType.AND) 6886 end = self._parse_window_spec() 6887 6888 spec = self.expression( 6889 exp.WindowSpec, 6890 kind=kind, 6891 start=start["value"], 6892 start_side=start["side"], 6893 end=end["value"], 6894 end_side=end["side"], 6895 ) 6896 else: 6897 spec = None 6898 6899 self._match_r_paren() 6900 6901 window = self.expression( 6902 exp.Window, 6903 comments=comments, 6904 this=this, 6905 partition_by=partition, 6906 order=order, 6907 spec=spec, 6908 alias=window_alias, 6909 over=over, 6910 first=first, 6911 ) 6912 6913 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6914 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6915 return self._parse_window(window, alias=alias) 6916 6917 return window 6918 6919 def _parse_partition_and_order( 6920 self, 6921 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6922 return self._parse_partition_by(), self._parse_order() 6923 6924 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6925 self._match(TokenType.BETWEEN) 6926 6927 return { 6928 "value": ( 6929 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6930 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6931 or self._parse_bitwise() 6932 ), 6933 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6934 } 6935 6936 def _parse_alias( 6937 self, this: t.Optional[exp.Expression], explicit: bool = False 6938 ) -> t.Optional[exp.Expression]: 6939 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6940 # so this section tries to parse the clause version and if it fails, it treats the token 6941 # as an identifier (alias) 6942 if self._can_parse_limit_or_offset(): 6943 return this 6944 6945 any_token = self._match(TokenType.ALIAS) 6946 comments = self._prev_comments or [] 6947 6948 if explicit and not any_token: 6949 return this 6950 6951 if self._match(TokenType.L_PAREN): 6952 aliases = self.expression( 6953 exp.Aliases, 6954 comments=comments, 6955 this=this, 6956 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6957 ) 6958 self._match_r_paren(aliases) 6959 return aliases 6960 6961 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6962 self.STRING_ALIASES and self._parse_string_as_identifier() 6963 ) 6964 6965 if alias: 6966 comments.extend(alias.pop_comments()) 6967 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6968 column = this.this 6969 6970 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6971 if not this.comments and column and column.comments: 6972 this.comments = column.pop_comments() 6973 6974 return this 6975 6976 def _parse_id_var( 6977 self, 6978 any_token: bool = True, 6979 tokens: t.Optional[t.Collection[TokenType]] = None, 6980 ) -> t.Optional[exp.Expression]: 6981 expression = self._parse_identifier() 6982 if not expression and ( 6983 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6984 ): 6985 quoted = self._prev.token_type == TokenType.STRING 6986 expression = self._identifier_expression(quoted=quoted) 6987 6988 return expression 6989 6990 def _parse_string(self) -> t.Optional[exp.Expression]: 6991 if self._match_set(self.STRING_PARSERS): 6992 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6993 return self._parse_placeholder() 6994 6995 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6996 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6997 if output: 6998 output.update_positions(self._prev) 6999 return output 7000 7001 def _parse_number(self) -> t.Optional[exp.Expression]: 7002 if self._match_set(self.NUMERIC_PARSERS): 7003 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7004 return self._parse_placeholder() 7005 7006 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7007 if self._match(TokenType.IDENTIFIER): 7008 return self._identifier_expression(quoted=True) 7009 return self._parse_placeholder() 7010 7011 def _parse_var( 7012 self, 7013 any_token: bool = False, 7014 tokens: t.Optional[t.Collection[TokenType]] = None, 7015 upper: bool = False, 7016 ) -> t.Optional[exp.Expression]: 7017 if ( 7018 (any_token and self._advance_any()) 7019 or self._match(TokenType.VAR) 7020 or (self._match_set(tokens) if tokens else False) 7021 ): 7022 return self.expression( 7023 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7024 ) 7025 return self._parse_placeholder() 7026 7027 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7028 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7029 self._advance() 7030 return self._prev 7031 return None 7032 7033 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7034 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7035 7036 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7037 return self._parse_primary() or self._parse_var(any_token=True) 7038 7039 def _parse_null(self) -> t.Optional[exp.Expression]: 7040 if self._match_set(self.NULL_TOKENS): 7041 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7042 return self._parse_placeholder() 7043 7044 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7045 if self._match(TokenType.TRUE): 7046 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7047 if self._match(TokenType.FALSE): 7048 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7049 return self._parse_placeholder() 7050 7051 def _parse_star(self) -> t.Optional[exp.Expression]: 7052 if self._match(TokenType.STAR): 7053 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7054 return self._parse_placeholder() 7055 7056 def _parse_parameter(self) -> exp.Parameter: 7057 this = self._parse_identifier() or self._parse_primary_or_var() 7058 return self.expression(exp.Parameter, this=this) 7059 7060 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7061 if self._match_set(self.PLACEHOLDER_PARSERS): 7062 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7063 if placeholder: 7064 return placeholder 7065 self._advance(-1) 7066 return None 7067 7068 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7069 if not self._match_texts(keywords): 7070 return None 7071 if self._match(TokenType.L_PAREN, advance=False): 7072 return self._parse_wrapped_csv(self._parse_expression) 7073 7074 expression = self._parse_expression() 7075 return [expression] if expression else None 7076 7077 def _parse_csv( 7078 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7079 ) -> t.List[exp.Expression]: 7080 parse_result = parse_method() 7081 items = [parse_result] if parse_result is not None else [] 7082 7083 while self._match(sep): 7084 self._add_comments(parse_result) 7085 parse_result = parse_method() 7086 if parse_result is not None: 7087 items.append(parse_result) 7088 7089 return items 7090 7091 def _parse_tokens( 7092 self, parse_method: t.Callable, expressions: t.Dict 7093 ) -> t.Optional[exp.Expression]: 7094 this = parse_method() 7095 7096 while self._match_set(expressions): 7097 this = self.expression( 7098 expressions[self._prev.token_type], 7099 this=this, 7100 comments=self._prev_comments, 7101 expression=parse_method(), 7102 ) 7103 7104 return this 7105 7106 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7107 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7108 7109 def _parse_wrapped_csv( 7110 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7111 ) -> t.List[exp.Expression]: 7112 return self._parse_wrapped( 7113 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7114 ) 7115 7116 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7117 wrapped = self._match(TokenType.L_PAREN) 7118 if not wrapped and not optional: 7119 self.raise_error("Expecting (") 7120 parse_result = parse_method() 7121 if wrapped: 7122 self._match_r_paren() 7123 return parse_result 7124 7125 def _parse_expressions(self) -> t.List[exp.Expression]: 7126 return self._parse_csv(self._parse_expression) 7127 7128 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7129 return self._parse_select() or self._parse_set_operations( 7130 self._parse_alias(self._parse_assignment(), explicit=True) 7131 if alias 7132 else self._parse_assignment() 7133 ) 7134 7135 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7136 return self._parse_query_modifiers( 7137 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7138 ) 7139 7140 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7141 this = None 7142 if self._match_texts(self.TRANSACTION_KIND): 7143 this = self._prev.text 7144 7145 self._match_texts(("TRANSACTION", "WORK")) 7146 7147 modes = [] 7148 while True: 7149 mode = [] 7150 while self._match(TokenType.VAR): 7151 mode.append(self._prev.text) 7152 7153 if mode: 7154 modes.append(" ".join(mode)) 7155 if not self._match(TokenType.COMMA): 7156 break 7157 7158 return self.expression(exp.Transaction, this=this, modes=modes) 7159 7160 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7161 chain = None 7162 savepoint = None 7163 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7164 7165 self._match_texts(("TRANSACTION", "WORK")) 7166 7167 if self._match_text_seq("TO"): 7168 self._match_text_seq("SAVEPOINT") 7169 savepoint = self._parse_id_var() 7170 7171 if self._match(TokenType.AND): 7172 chain = not self._match_text_seq("NO") 7173 self._match_text_seq("CHAIN") 7174 7175 if is_rollback: 7176 return self.expression(exp.Rollback, savepoint=savepoint) 7177 7178 return self.expression(exp.Commit, chain=chain) 7179 7180 def _parse_refresh(self) -> exp.Refresh: 7181 self._match(TokenType.TABLE) 7182 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7183 7184 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7185 if not self._match_text_seq("ADD"): 7186 return None 7187 7188 self._match(TokenType.COLUMN) 7189 exists_column = self._parse_exists(not_=True) 7190 expression = self._parse_field_def() 7191 7192 if expression: 7193 expression.set("exists", exists_column) 7194 7195 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7196 if self._match_texts(("FIRST", "AFTER")): 7197 position = self._prev.text 7198 column_position = self.expression( 7199 exp.ColumnPosition, this=self._parse_column(), position=position 7200 ) 7201 expression.set("position", column_position) 7202 7203 return expression 7204 7205 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7206 drop = self._match(TokenType.DROP) and self._parse_drop() 7207 if drop and not isinstance(drop, exp.Command): 7208 drop.set("kind", drop.args.get("kind", "COLUMN")) 7209 return drop 7210 7211 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7212 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7213 return self.expression( 7214 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7215 ) 7216 7217 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7218 index = self._index - 1 7219 7220 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7221 return self._parse_csv( 7222 lambda: self.expression( 7223 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7224 ) 7225 ) 7226 7227 self._retreat(index) 7228 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7229 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7230 7231 if self._match_text_seq("ADD", "COLUMNS"): 7232 schema = self._parse_schema() 7233 if schema: 7234 return [schema] 7235 return [] 7236 7237 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7238 7239 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7240 if self._match_texts(self.ALTER_ALTER_PARSERS): 7241 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7242 7243 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7244 # keyword after ALTER we default to parsing this statement 7245 self._match(TokenType.COLUMN) 7246 column = self._parse_field(any_token=True) 7247 7248 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7249 return self.expression(exp.AlterColumn, this=column, drop=True) 7250 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7251 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7252 if self._match(TokenType.COMMENT): 7253 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7254 if self._match_text_seq("DROP", "NOT", "NULL"): 7255 return self.expression( 7256 exp.AlterColumn, 7257 this=column, 7258 drop=True, 7259 allow_null=True, 7260 ) 7261 if self._match_text_seq("SET", "NOT", "NULL"): 7262 return self.expression( 7263 exp.AlterColumn, 7264 this=column, 7265 allow_null=False, 7266 ) 7267 7268 if self._match_text_seq("SET", "VISIBLE"): 7269 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7270 if self._match_text_seq("SET", "INVISIBLE"): 7271 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7272 7273 self._match_text_seq("SET", "DATA") 7274 self._match_text_seq("TYPE") 7275 return self.expression( 7276 exp.AlterColumn, 7277 this=column, 7278 dtype=self._parse_types(), 7279 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7280 using=self._match(TokenType.USING) and self._parse_assignment(), 7281 ) 7282 7283 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7284 if self._match_texts(("ALL", "EVEN", "AUTO")): 7285 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7286 7287 self._match_text_seq("KEY", "DISTKEY") 7288 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7289 7290 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7291 if compound: 7292 self._match_text_seq("SORTKEY") 7293 7294 if self._match(TokenType.L_PAREN, advance=False): 7295 return self.expression( 7296 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7297 ) 7298 7299 self._match_texts(("AUTO", "NONE")) 7300 return self.expression( 7301 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7302 ) 7303 7304 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7305 index = self._index - 1 7306 7307 partition_exists = self._parse_exists() 7308 if self._match(TokenType.PARTITION, advance=False): 7309 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7310 7311 self._retreat(index) 7312 return self._parse_csv(self._parse_drop_column) 7313 7314 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7315 if self._match(TokenType.COLUMN): 7316 exists = self._parse_exists() 7317 old_column = self._parse_column() 7318 to = self._match_text_seq("TO") 7319 new_column = self._parse_column() 7320 7321 if old_column is None or to is None or new_column is None: 7322 return None 7323 7324 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7325 7326 self._match_text_seq("TO") 7327 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7328 7329 def _parse_alter_table_set(self) -> exp.AlterSet: 7330 alter_set = self.expression(exp.AlterSet) 7331 7332 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7333 "TABLE", "PROPERTIES" 7334 ): 7335 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7336 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7337 alter_set.set("expressions", [self._parse_assignment()]) 7338 elif self._match_texts(("LOGGED", "UNLOGGED")): 7339 alter_set.set("option", exp.var(self._prev.text.upper())) 7340 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7341 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7342 elif self._match_text_seq("LOCATION"): 7343 alter_set.set("location", self._parse_field()) 7344 elif self._match_text_seq("ACCESS", "METHOD"): 7345 alter_set.set("access_method", self._parse_field()) 7346 elif self._match_text_seq("TABLESPACE"): 7347 alter_set.set("tablespace", self._parse_field()) 7348 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7349 alter_set.set("file_format", [self._parse_field()]) 7350 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7351 alter_set.set("file_format", self._parse_wrapped_options()) 7352 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7353 alter_set.set("copy_options", self._parse_wrapped_options()) 7354 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7355 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7356 else: 7357 if self._match_text_seq("SERDE"): 7358 alter_set.set("serde", self._parse_field()) 7359 7360 alter_set.set("expressions", [self._parse_properties()]) 7361 7362 return alter_set 7363 7364 def _parse_alter(self) -> exp.Alter | exp.Command: 7365 start = self._prev 7366 7367 alter_token = self._match_set(self.ALTERABLES) and self._prev 7368 if not alter_token: 7369 return self._parse_as_command(start) 7370 7371 exists = self._parse_exists() 7372 only = self._match_text_seq("ONLY") 7373 this = self._parse_table(schema=True) 7374 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7375 7376 if self._next: 7377 self._advance() 7378 7379 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7380 if parser: 7381 actions = ensure_list(parser(self)) 7382 not_valid = self._match_text_seq("NOT", "VALID") 7383 options = self._parse_csv(self._parse_property) 7384 7385 if not self._curr and actions: 7386 return self.expression( 7387 exp.Alter, 7388 this=this, 7389 kind=alter_token.text.upper(), 7390 exists=exists, 7391 actions=actions, 7392 only=only, 7393 options=options, 7394 cluster=cluster, 7395 not_valid=not_valid, 7396 ) 7397 7398 return self._parse_as_command(start) 7399 7400 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7401 start = self._prev 7402 # https://duckdb.org/docs/sql/statements/analyze 7403 if not self._curr: 7404 return self.expression(exp.Analyze) 7405 7406 options = [] 7407 while self._match_texts(self.ANALYZE_STYLES): 7408 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7409 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7410 else: 7411 options.append(self._prev.text.upper()) 7412 7413 this: t.Optional[exp.Expression] = None 7414 inner_expression: t.Optional[exp.Expression] = None 7415 7416 kind = self._curr and self._curr.text.upper() 7417 7418 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7419 this = self._parse_table_parts() 7420 elif self._match_text_seq("TABLES"): 7421 if self._match_set((TokenType.FROM, TokenType.IN)): 7422 kind = f"{kind} {self._prev.text.upper()}" 7423 this = self._parse_table(schema=True, is_db_reference=True) 7424 elif self._match_text_seq("DATABASE"): 7425 this = self._parse_table(schema=True, is_db_reference=True) 7426 elif self._match_text_seq("CLUSTER"): 7427 this = self._parse_table() 7428 # Try matching inner expr keywords before fallback to parse table. 7429 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7430 kind = None 7431 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7432 else: 7433 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7434 kind = None 7435 this = self._parse_table_parts() 7436 7437 partition = self._try_parse(self._parse_partition) 7438 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7439 return self._parse_as_command(start) 7440 7441 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7442 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7443 "WITH", "ASYNC", "MODE" 7444 ): 7445 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7446 else: 7447 mode = None 7448 7449 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7450 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7451 7452 properties = self._parse_properties() 7453 return self.expression( 7454 exp.Analyze, 7455 kind=kind, 7456 this=this, 7457 mode=mode, 7458 partition=partition, 7459 properties=properties, 7460 expression=inner_expression, 7461 options=options, 7462 ) 7463 7464 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7465 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7466 this = None 7467 kind = self._prev.text.upper() 7468 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7469 expressions = [] 7470 7471 if not self._match_text_seq("STATISTICS"): 7472 self.raise_error("Expecting token STATISTICS") 7473 7474 if self._match_text_seq("NOSCAN"): 7475 this = "NOSCAN" 7476 elif self._match(TokenType.FOR): 7477 if self._match_text_seq("ALL", "COLUMNS"): 7478 this = "FOR ALL COLUMNS" 7479 if self._match_texts("COLUMNS"): 7480 this = "FOR COLUMNS" 7481 expressions = self._parse_csv(self._parse_column_reference) 7482 elif self._match_text_seq("SAMPLE"): 7483 sample = self._parse_number() 7484 expressions = [ 7485 self.expression( 7486 exp.AnalyzeSample, 7487 sample=sample, 7488 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7489 ) 7490 ] 7491 7492 return self.expression( 7493 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7494 ) 7495 7496 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7497 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7498 kind = None 7499 this = None 7500 expression: t.Optional[exp.Expression] = None 7501 if self._match_text_seq("REF", "UPDATE"): 7502 kind = "REF" 7503 this = "UPDATE" 7504 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7505 this = "UPDATE SET DANGLING TO NULL" 7506 elif self._match_text_seq("STRUCTURE"): 7507 kind = "STRUCTURE" 7508 if self._match_text_seq("CASCADE", "FAST"): 7509 this = "CASCADE FAST" 7510 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7511 ("ONLINE", "OFFLINE") 7512 ): 7513 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7514 expression = self._parse_into() 7515 7516 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7517 7518 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7519 this = self._prev.text.upper() 7520 if self._match_text_seq("COLUMNS"): 7521 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7522 return None 7523 7524 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7525 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7526 if self._match_text_seq("STATISTICS"): 7527 return self.expression(exp.AnalyzeDelete, kind=kind) 7528 return None 7529 7530 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7531 if self._match_text_seq("CHAINED", "ROWS"): 7532 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7533 return None 7534 7535 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7536 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7537 this = self._prev.text.upper() 7538 expression: t.Optional[exp.Expression] = None 7539 expressions = [] 7540 update_options = None 7541 7542 if self._match_text_seq("HISTOGRAM", "ON"): 7543 expressions = self._parse_csv(self._parse_column_reference) 7544 with_expressions = [] 7545 while self._match(TokenType.WITH): 7546 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7547 if self._match_texts(("SYNC", "ASYNC")): 7548 if self._match_text_seq("MODE", advance=False): 7549 with_expressions.append(f"{self._prev.text.upper()} MODE") 7550 self._advance() 7551 else: 7552 buckets = self._parse_number() 7553 if self._match_text_seq("BUCKETS"): 7554 with_expressions.append(f"{buckets} BUCKETS") 7555 if with_expressions: 7556 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7557 7558 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7559 TokenType.UPDATE, advance=False 7560 ): 7561 update_options = self._prev.text.upper() 7562 self._advance() 7563 elif self._match_text_seq("USING", "DATA"): 7564 expression = self.expression(exp.UsingData, this=self._parse_string()) 7565 7566 return self.expression( 7567 exp.AnalyzeHistogram, 7568 this=this, 7569 expressions=expressions, 7570 expression=expression, 7571 update_options=update_options, 7572 ) 7573 7574 def _parse_merge(self) -> exp.Merge: 7575 self._match(TokenType.INTO) 7576 target = self._parse_table() 7577 7578 if target and self._match(TokenType.ALIAS, advance=False): 7579 target.set("alias", self._parse_table_alias()) 7580 7581 self._match(TokenType.USING) 7582 using = self._parse_table() 7583 7584 self._match(TokenType.ON) 7585 on = self._parse_assignment() 7586 7587 return self.expression( 7588 exp.Merge, 7589 this=target, 7590 using=using, 7591 on=on, 7592 whens=self._parse_when_matched(), 7593 returning=self._parse_returning(), 7594 ) 7595 7596 def _parse_when_matched(self) -> exp.Whens: 7597 whens = [] 7598 7599 while self._match(TokenType.WHEN): 7600 matched = not self._match(TokenType.NOT) 7601 self._match_text_seq("MATCHED") 7602 source = ( 7603 False 7604 if self._match_text_seq("BY", "TARGET") 7605 else self._match_text_seq("BY", "SOURCE") 7606 ) 7607 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7608 7609 self._match(TokenType.THEN) 7610 7611 if self._match(TokenType.INSERT): 7612 this = self._parse_star() 7613 if this: 7614 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7615 else: 7616 then = self.expression( 7617 exp.Insert, 7618 this=exp.var("ROW") 7619 if self._match_text_seq("ROW") 7620 else self._parse_value(values=False), 7621 expression=self._match_text_seq("VALUES") and self._parse_value(), 7622 ) 7623 elif self._match(TokenType.UPDATE): 7624 expressions = self._parse_star() 7625 if expressions: 7626 then = self.expression(exp.Update, expressions=expressions) 7627 else: 7628 then = self.expression( 7629 exp.Update, 7630 expressions=self._match(TokenType.SET) 7631 and self._parse_csv(self._parse_equality), 7632 ) 7633 elif self._match(TokenType.DELETE): 7634 then = self.expression(exp.Var, this=self._prev.text) 7635 else: 7636 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7637 7638 whens.append( 7639 self.expression( 7640 exp.When, 7641 matched=matched, 7642 source=source, 7643 condition=condition, 7644 then=then, 7645 ) 7646 ) 7647 return self.expression(exp.Whens, expressions=whens) 7648 7649 def _parse_show(self) -> t.Optional[exp.Expression]: 7650 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7651 if parser: 7652 return parser(self) 7653 return self._parse_as_command(self._prev) 7654 7655 def _parse_set_item_assignment( 7656 self, kind: t.Optional[str] = None 7657 ) -> t.Optional[exp.Expression]: 7658 index = self._index 7659 7660 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7661 return self._parse_set_transaction(global_=kind == "GLOBAL") 7662 7663 left = self._parse_primary() or self._parse_column() 7664 assignment_delimiter = self._match_texts(("=", "TO")) 7665 7666 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7667 self._retreat(index) 7668 return None 7669 7670 right = self._parse_statement() or self._parse_id_var() 7671 if isinstance(right, (exp.Column, exp.Identifier)): 7672 right = exp.var(right.name) 7673 7674 this = self.expression(exp.EQ, this=left, expression=right) 7675 return self.expression(exp.SetItem, this=this, kind=kind) 7676 7677 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7678 self._match_text_seq("TRANSACTION") 7679 characteristics = self._parse_csv( 7680 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7681 ) 7682 return self.expression( 7683 exp.SetItem, 7684 expressions=characteristics, 7685 kind="TRANSACTION", 7686 **{"global": global_}, # type: ignore 7687 ) 7688 7689 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7690 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7691 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7692 7693 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7694 index = self._index 7695 set_ = self.expression( 7696 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7697 ) 7698 7699 if self._curr: 7700 self._retreat(index) 7701 return self._parse_as_command(self._prev) 7702 7703 return set_ 7704 7705 def _parse_var_from_options( 7706 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7707 ) -> t.Optional[exp.Var]: 7708 start = self._curr 7709 if not start: 7710 return None 7711 7712 option = start.text.upper() 7713 continuations = options.get(option) 7714 7715 index = self._index 7716 self._advance() 7717 for keywords in continuations or []: 7718 if isinstance(keywords, str): 7719 keywords = (keywords,) 7720 7721 if self._match_text_seq(*keywords): 7722 option = f"{option} {' '.join(keywords)}" 7723 break 7724 else: 7725 if continuations or continuations is None: 7726 if raise_unmatched: 7727 self.raise_error(f"Unknown option {option}") 7728 7729 self._retreat(index) 7730 return None 7731 7732 return exp.var(option) 7733 7734 def _parse_as_command(self, start: Token) -> exp.Command: 7735 while self._curr: 7736 self._advance() 7737 text = self._find_sql(start, self._prev) 7738 size = len(start.text) 7739 self._warn_unsupported() 7740 return exp.Command(this=text[:size], expression=text[size:]) 7741 7742 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7743 settings = [] 7744 7745 self._match_l_paren() 7746 kind = self._parse_id_var() 7747 7748 if self._match(TokenType.L_PAREN): 7749 while True: 7750 key = self._parse_id_var() 7751 value = self._parse_primary() 7752 if not key and value is None: 7753 break 7754 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7755 self._match(TokenType.R_PAREN) 7756 7757 self._match_r_paren() 7758 7759 return self.expression( 7760 exp.DictProperty, 7761 this=this, 7762 kind=kind.this if kind else None, 7763 settings=settings, 7764 ) 7765 7766 def _parse_dict_range(self, this: str) -> exp.DictRange: 7767 self._match_l_paren() 7768 has_min = self._match_text_seq("MIN") 7769 if has_min: 7770 min = self._parse_var() or self._parse_primary() 7771 self._match_text_seq("MAX") 7772 max = self._parse_var() or self._parse_primary() 7773 else: 7774 max = self._parse_var() or self._parse_primary() 7775 min = exp.Literal.number(0) 7776 self._match_r_paren() 7777 return self.expression(exp.DictRange, this=this, min=min, max=max) 7778 7779 def _parse_comprehension( 7780 self, this: t.Optional[exp.Expression] 7781 ) -> t.Optional[exp.Comprehension]: 7782 index = self._index 7783 expression = self._parse_column() 7784 if not self._match(TokenType.IN): 7785 self._retreat(index - 1) 7786 return None 7787 iterator = self._parse_column() 7788 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7789 return self.expression( 7790 exp.Comprehension, 7791 this=this, 7792 expression=expression, 7793 iterator=iterator, 7794 condition=condition, 7795 ) 7796 7797 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7798 if self._match(TokenType.HEREDOC_STRING): 7799 return self.expression(exp.Heredoc, this=self._prev.text) 7800 7801 if not self._match_text_seq("$"): 7802 return None 7803 7804 tags = ["$"] 7805 tag_text = None 7806 7807 if self._is_connected(): 7808 self._advance() 7809 tags.append(self._prev.text.upper()) 7810 else: 7811 self.raise_error("No closing $ found") 7812 7813 if tags[-1] != "$": 7814 if self._is_connected() and self._match_text_seq("$"): 7815 tag_text = tags[-1] 7816 tags.append("$") 7817 else: 7818 self.raise_error("No closing $ found") 7819 7820 heredoc_start = self._curr 7821 7822 while self._curr: 7823 if self._match_text_seq(*tags, advance=False): 7824 this = self._find_sql(heredoc_start, self._prev) 7825 self._advance(len(tags)) 7826 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7827 7828 self._advance() 7829 7830 self.raise_error(f"No closing {''.join(tags)} found") 7831 return None 7832 7833 def _find_parser( 7834 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7835 ) -> t.Optional[t.Callable]: 7836 if not self._curr: 7837 return None 7838 7839 index = self._index 7840 this = [] 7841 while True: 7842 # The current token might be multiple words 7843 curr = self._curr.text.upper() 7844 key = curr.split(" ") 7845 this.append(curr) 7846 7847 self._advance() 7848 result, trie = in_trie(trie, key) 7849 if result == TrieResult.FAILED: 7850 break 7851 7852 if result == TrieResult.EXISTS: 7853 subparser = parsers[" ".join(this)] 7854 return subparser 7855 7856 self._retreat(index) 7857 return None 7858 7859 def _match(self, token_type, advance=True, expression=None): 7860 if not self._curr: 7861 return None 7862 7863 if self._curr.token_type == token_type: 7864 if advance: 7865 self._advance() 7866 self._add_comments(expression) 7867 return True 7868 7869 return None 7870 7871 def _match_set(self, types, advance=True): 7872 if not self._curr: 7873 return None 7874 7875 if self._curr.token_type in types: 7876 if advance: 7877 self._advance() 7878 return True 7879 7880 return None 7881 7882 def _match_pair(self, token_type_a, token_type_b, advance=True): 7883 if not self._curr or not self._next: 7884 return None 7885 7886 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7887 if advance: 7888 self._advance(2) 7889 return True 7890 7891 return None 7892 7893 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7894 if not self._match(TokenType.L_PAREN, expression=expression): 7895 self.raise_error("Expecting (") 7896 7897 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7898 if not self._match(TokenType.R_PAREN, expression=expression): 7899 self.raise_error("Expecting )") 7900 7901 def _match_texts(self, texts, advance=True): 7902 if ( 7903 self._curr 7904 and self._curr.token_type != TokenType.STRING 7905 and self._curr.text.upper() in texts 7906 ): 7907 if advance: 7908 self._advance() 7909 return True 7910 return None 7911 7912 def _match_text_seq(self, *texts, advance=True): 7913 index = self._index 7914 for text in texts: 7915 if ( 7916 self._curr 7917 and self._curr.token_type != TokenType.STRING 7918 and self._curr.text.upper() == text 7919 ): 7920 self._advance() 7921 else: 7922 self._retreat(index) 7923 return None 7924 7925 if not advance: 7926 self._retreat(index) 7927 7928 return True 7929 7930 def _replace_lambda( 7931 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7932 ) -> t.Optional[exp.Expression]: 7933 if not node: 7934 return node 7935 7936 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7937 7938 for column in node.find_all(exp.Column): 7939 typ = lambda_types.get(column.parts[0].name) 7940 if typ is not None: 7941 dot_or_id = column.to_dot() if column.table else column.this 7942 7943 if typ: 7944 dot_or_id = self.expression( 7945 exp.Cast, 7946 this=dot_or_id, 7947 to=typ, 7948 ) 7949 7950 parent = column.parent 7951 7952 while isinstance(parent, exp.Dot): 7953 if not isinstance(parent.parent, exp.Dot): 7954 parent.replace(dot_or_id) 7955 break 7956 parent = parent.parent 7957 else: 7958 if column is node: 7959 node = dot_or_id 7960 else: 7961 column.replace(dot_or_id) 7962 return node 7963 7964 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7965 start = self._prev 7966 7967 # Not to be confused with TRUNCATE(number, decimals) function call 7968 if self._match(TokenType.L_PAREN): 7969 self._retreat(self._index - 2) 7970 return self._parse_function() 7971 7972 # Clickhouse supports TRUNCATE DATABASE as well 7973 is_database = self._match(TokenType.DATABASE) 7974 7975 self._match(TokenType.TABLE) 7976 7977 exists = self._parse_exists(not_=False) 7978 7979 expressions = self._parse_csv( 7980 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7981 ) 7982 7983 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7984 7985 if self._match_text_seq("RESTART", "IDENTITY"): 7986 identity = "RESTART" 7987 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7988 identity = "CONTINUE" 7989 else: 7990 identity = None 7991 7992 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7993 option = self._prev.text 7994 else: 7995 option = None 7996 7997 partition = self._parse_partition() 7998 7999 # Fallback case 8000 if self._curr: 8001 return self._parse_as_command(start) 8002 8003 return self.expression( 8004 exp.TruncateTable, 8005 expressions=expressions, 8006 is_database=is_database, 8007 exists=exists, 8008 cluster=cluster, 8009 identity=identity, 8010 option=option, 8011 partition=partition, 8012 ) 8013 8014 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8015 this = self._parse_ordered(self._parse_opclass) 8016 8017 if not self._match(TokenType.WITH): 8018 return this 8019 8020 op = self._parse_var(any_token=True) 8021 8022 return self.expression(exp.WithOperator, this=this, op=op) 8023 8024 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8025 self._match(TokenType.EQ) 8026 self._match(TokenType.L_PAREN) 8027 8028 opts: t.List[t.Optional[exp.Expression]] = [] 8029 option: exp.Expression | None 8030 while self._curr and not self._match(TokenType.R_PAREN): 8031 if self._match_text_seq("FORMAT_NAME", "="): 8032 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8033 option = self._parse_format_name() 8034 else: 8035 option = self._parse_property() 8036 8037 if option is None: 8038 self.raise_error("Unable to parse option") 8039 break 8040 8041 opts.append(option) 8042 8043 return opts 8044 8045 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8046 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8047 8048 options = [] 8049 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8050 option = self._parse_var(any_token=True) 8051 prev = self._prev.text.upper() 8052 8053 # Different dialects might separate options and values by white space, "=" and "AS" 8054 self._match(TokenType.EQ) 8055 self._match(TokenType.ALIAS) 8056 8057 param = self.expression(exp.CopyParameter, this=option) 8058 8059 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8060 TokenType.L_PAREN, advance=False 8061 ): 8062 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8063 param.set("expressions", self._parse_wrapped_options()) 8064 elif prev == "FILE_FORMAT": 8065 # T-SQL's external file format case 8066 param.set("expression", self._parse_field()) 8067 else: 8068 param.set("expression", self._parse_unquoted_field()) 8069 8070 options.append(param) 8071 self._match(sep) 8072 8073 return options 8074 8075 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8076 expr = self.expression(exp.Credentials) 8077 8078 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8079 expr.set("storage", self._parse_field()) 8080 if self._match_text_seq("CREDENTIALS"): 8081 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8082 creds = ( 8083 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8084 ) 8085 expr.set("credentials", creds) 8086 if self._match_text_seq("ENCRYPTION"): 8087 expr.set("encryption", self._parse_wrapped_options()) 8088 if self._match_text_seq("IAM_ROLE"): 8089 expr.set("iam_role", self._parse_field()) 8090 if self._match_text_seq("REGION"): 8091 expr.set("region", self._parse_field()) 8092 8093 return expr 8094 8095 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8096 return self._parse_field() 8097 8098 def _parse_copy(self) -> exp.Copy | exp.Command: 8099 start = self._prev 8100 8101 self._match(TokenType.INTO) 8102 8103 this = ( 8104 self._parse_select(nested=True, parse_subquery_alias=False) 8105 if self._match(TokenType.L_PAREN, advance=False) 8106 else self._parse_table(schema=True) 8107 ) 8108 8109 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8110 8111 files = self._parse_csv(self._parse_file_location) 8112 credentials = self._parse_credentials() 8113 8114 self._match_text_seq("WITH") 8115 8116 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8117 8118 # Fallback case 8119 if self._curr: 8120 return self._parse_as_command(start) 8121 8122 return self.expression( 8123 exp.Copy, 8124 this=this, 8125 kind=kind, 8126 credentials=credentials, 8127 files=files, 8128 params=params, 8129 ) 8130 8131 def _parse_normalize(self) -> exp.Normalize: 8132 return self.expression( 8133 exp.Normalize, 8134 this=self._parse_bitwise(), 8135 form=self._match(TokenType.COMMA) and self._parse_var(), 8136 ) 8137 8138 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8139 args = self._parse_csv(lambda: self._parse_lambda()) 8140 8141 this = seq_get(args, 0) 8142 decimals = seq_get(args, 1) 8143 8144 return expr_type( 8145 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8146 ) 8147 8148 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8149 if self._match_text_seq("COLUMNS", "(", advance=False): 8150 this = self._parse_function() 8151 if isinstance(this, exp.Columns): 8152 this.set("unpack", True) 8153 return this 8154 8155 return self.expression( 8156 exp.Star, 8157 **{ # type: ignore 8158 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8159 "replace": self._parse_star_op("REPLACE"), 8160 "rename": self._parse_star_op("RENAME"), 8161 }, 8162 ) 8163 8164 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8165 privilege_parts = [] 8166 8167 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8168 # (end of privilege list) or L_PAREN (start of column list) are met 8169 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8170 privilege_parts.append(self._curr.text.upper()) 8171 self._advance() 8172 8173 this = exp.var(" ".join(privilege_parts)) 8174 expressions = ( 8175 self._parse_wrapped_csv(self._parse_column) 8176 if self._match(TokenType.L_PAREN, advance=False) 8177 else None 8178 ) 8179 8180 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8181 8182 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8183 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8184 principal = self._parse_id_var() 8185 8186 if not principal: 8187 return None 8188 8189 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8190 8191 def _parse_grant(self) -> exp.Grant | exp.Command: 8192 start = self._prev 8193 8194 privileges = self._parse_csv(self._parse_grant_privilege) 8195 8196 self._match(TokenType.ON) 8197 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8198 8199 # Attempt to parse the securable e.g. MySQL allows names 8200 # such as "foo.*", "*.*" which are not easily parseable yet 8201 securable = self._try_parse(self._parse_table_parts) 8202 8203 if not securable or not self._match_text_seq("TO"): 8204 return self._parse_as_command(start) 8205 8206 principals = self._parse_csv(self._parse_grant_principal) 8207 8208 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8209 8210 if self._curr: 8211 return self._parse_as_command(start) 8212 8213 return self.expression( 8214 exp.Grant, 8215 privileges=privileges, 8216 kind=kind, 8217 securable=securable, 8218 principals=principals, 8219 grant_option=grant_option, 8220 ) 8221 8222 def _parse_overlay(self) -> exp.Overlay: 8223 return self.expression( 8224 exp.Overlay, 8225 **{ # type: ignore 8226 "this": self._parse_bitwise(), 8227 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8228 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8229 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8230 }, 8231 ) 8232 8233 def _parse_format_name(self) -> exp.Property: 8234 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8235 # for FILE_FORMAT = <format_name> 8236 return self.expression( 8237 exp.Property, 8238 this=exp.var("FORMAT_NAME"), 8239 value=self._parse_string() or self._parse_table_parts(), 8240 ) 8241 8242 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8243 args: t.List[exp.Expression] = [] 8244 8245 if self._match(TokenType.DISTINCT): 8246 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8247 self._match(TokenType.COMMA) 8248 8249 args.extend(self._parse_csv(self._parse_assignment)) 8250 8251 return self.expression( 8252 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8253 ) 8254 8255 def _identifier_expression( 8256 self, token: t.Optional[Token] = None, **kwargs: t.Any 8257 ) -> exp.Identifier: 8258 token = token or self._prev 8259 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8260 expression.update_positions(token) 8261 return expression
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1510 def __init__( 1511 self, 1512 error_level: t.Optional[ErrorLevel] = None, 1513 error_message_context: int = 100, 1514 max_errors: int = 3, 1515 dialect: DialectType = None, 1516 ): 1517 from sqlglot.dialects import Dialect 1518 1519 self.error_level = error_level or ErrorLevel.IMMEDIATE 1520 self.error_message_context = error_message_context 1521 self.max_errors = max_errors 1522 self.dialect = Dialect.get_or_raise(dialect) 1523 self.reset()
1535 def parse( 1536 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1537 ) -> t.List[t.Optional[exp.Expression]]: 1538 """ 1539 Parses a list of tokens and returns a list of syntax trees, one tree 1540 per parsed SQL statement. 1541 1542 Args: 1543 raw_tokens: The list of tokens. 1544 sql: The original SQL string, used to produce helpful debug messages. 1545 1546 Returns: 1547 The list of the produced syntax trees. 1548 """ 1549 return self._parse( 1550 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1551 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1553 def parse_into( 1554 self, 1555 expression_types: exp.IntoType, 1556 raw_tokens: t.List[Token], 1557 sql: t.Optional[str] = None, 1558 ) -> t.List[t.Optional[exp.Expression]]: 1559 """ 1560 Parses a list of tokens into a given Expression type. If a collection of Expression 1561 types is given instead, this method will try to parse the token list into each one 1562 of them, stopping at the first for which the parsing succeeds. 1563 1564 Args: 1565 expression_types: The expression type(s) to try and parse the token list into. 1566 raw_tokens: The list of tokens. 1567 sql: The original SQL string, used to produce helpful debug messages. 1568 1569 Returns: 1570 The target Expression. 1571 """ 1572 errors = [] 1573 for expression_type in ensure_list(expression_types): 1574 parser = self.EXPRESSION_PARSERS.get(expression_type) 1575 if not parser: 1576 raise TypeError(f"No parser registered for {expression_type}") 1577 1578 try: 1579 return self._parse(parser, raw_tokens, sql) 1580 except ParseError as e: 1581 e.errors[0]["into_expression"] = expression_type 1582 errors.append(e) 1583 1584 raise ParseError( 1585 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1586 errors=merge_errors(errors), 1587 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1627 def check_errors(self) -> None: 1628 """Logs or raises any found errors, depending on the chosen error level setting.""" 1629 if self.error_level == ErrorLevel.WARN: 1630 for error in self.errors: 1631 logger.error(str(error)) 1632 elif self.error_level == ErrorLevel.RAISE and self.errors: 1633 raise ParseError( 1634 concat_messages(self.errors, self.max_errors), 1635 errors=merge_errors(self.errors), 1636 )
Logs or raises any found errors, depending on the chosen error level setting.
1638 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1639 """ 1640 Appends an error in the list of recorded errors or raises it, depending on the chosen 1641 error level setting. 1642 """ 1643 token = token or self._curr or self._prev or Token.string("") 1644 start = token.start 1645 end = token.end + 1 1646 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1647 highlight = self.sql[start:end] 1648 end_context = self.sql[end : end + self.error_message_context] 1649 1650 error = ParseError.new( 1651 f"{message}. Line {token.line}, Col: {token.col}.\n" 1652 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1653 description=message, 1654 line=token.line, 1655 col=token.col, 1656 start_context=start_context, 1657 highlight=highlight, 1658 end_context=end_context, 1659 ) 1660 1661 if self.error_level == ErrorLevel.IMMEDIATE: 1662 raise error 1663 1664 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1666 def expression( 1667 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1668 ) -> E: 1669 """ 1670 Creates a new, validated Expression. 1671 1672 Args: 1673 exp_class: The expression class to instantiate. 1674 comments: An optional list of comments to attach to the expression. 1675 kwargs: The arguments to set for the expression along with their respective values. 1676 1677 Returns: 1678 The target expression. 1679 """ 1680 instance = exp_class(**kwargs) 1681 instance.add_comments(comments) if comments else self._add_comments(instance) 1682 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1689 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1690 """ 1691 Validates an Expression, making sure that all its mandatory arguments are set. 1692 1693 Args: 1694 expression: The expression to validate. 1695 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1696 1697 Returns: 1698 The validated expression. 1699 """ 1700 if self.error_level != ErrorLevel.IGNORE: 1701 for error_message in expression.error_messages(args): 1702 self.raise_error(error_message) 1703 1704 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4671 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4672 start = self._index 4673 _, side_token, kind_token = self._parse_join_parts() 4674 4675 side = side_token.text if side_token else None 4676 kind = kind_token.text if kind_token else None 4677 4678 if not self._match_set(self.SET_OPERATIONS): 4679 self._retreat(start) 4680 return None 4681 4682 token_type = self._prev.token_type 4683 4684 if token_type == TokenType.UNION: 4685 operation: t.Type[exp.SetOperation] = exp.Union 4686 elif token_type == TokenType.EXCEPT: 4687 operation = exp.Except 4688 else: 4689 operation = exp.Intersect 4690 4691 comments = self._prev.comments 4692 4693 if self._match(TokenType.DISTINCT): 4694 distinct: t.Optional[bool] = True 4695 elif self._match(TokenType.ALL): 4696 distinct = False 4697 else: 4698 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4699 if distinct is None: 4700 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4701 4702 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4703 "STRICT", "CORRESPONDING" 4704 ) 4705 if self._match_text_seq("CORRESPONDING"): 4706 by_name = True 4707 if not side and not kind: 4708 kind = "INNER" 4709 4710 on_column_list = None 4711 if by_name and self._match_texts(("ON", "BY")): 4712 on_column_list = self._parse_wrapped_csv(self._parse_column) 4713 4714 expression = self._parse_select(nested=True, parse_set_operation=False) 4715 4716 return self.expression( 4717 operation, 4718 comments=comments, 4719 this=this, 4720 distinct=distinct, 4721 by_name=by_name, 4722 expression=expression, 4723 side=side, 4724 kind=kind, 4725 on=on_column_list, 4726 )