sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 154 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 155 156 157def build_locate_strposition(args: t.List): 158 return exp.StrPosition( 159 this=seq_get(args, 1), 160 substr=seq_get(args, 0), 161 position=seq_get(args, 2), 162 ) 163 164 165class _Parser(type): 166 def __new__(cls, clsname, bases, attrs): 167 klass = super().__new__(cls, clsname, bases, attrs) 168 169 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 170 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 171 172 return klass 173 174 175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.BLOB, 332 TokenType.MEDIUMBLOB, 333 TokenType.LONGBLOB, 334 TokenType.BINARY, 335 TokenType.VARBINARY, 336 TokenType.JSON, 337 TokenType.JSONB, 338 TokenType.INTERVAL, 339 TokenType.TINYBLOB, 340 TokenType.TINYTEXT, 341 TokenType.TIME, 342 TokenType.TIMETZ, 343 TokenType.TIMESTAMP, 344 TokenType.TIMESTAMP_S, 345 TokenType.TIMESTAMP_MS, 346 TokenType.TIMESTAMP_NS, 347 TokenType.TIMESTAMPTZ, 348 TokenType.TIMESTAMPLTZ, 349 TokenType.TIMESTAMPNTZ, 350 TokenType.DATETIME, 351 TokenType.DATETIME2, 352 TokenType.DATETIME64, 353 TokenType.SMALLDATETIME, 354 TokenType.DATE, 355 TokenType.DATE32, 356 TokenType.INT4RANGE, 357 TokenType.INT4MULTIRANGE, 358 TokenType.INT8RANGE, 359 TokenType.INT8MULTIRANGE, 360 TokenType.NUMRANGE, 361 TokenType.NUMMULTIRANGE, 362 TokenType.TSRANGE, 363 TokenType.TSMULTIRANGE, 364 TokenType.TSTZRANGE, 365 TokenType.TSTZMULTIRANGE, 366 TokenType.DATERANGE, 367 TokenType.DATEMULTIRANGE, 368 TokenType.DECIMAL, 369 TokenType.DECIMAL32, 370 TokenType.DECIMAL64, 371 TokenType.DECIMAL128, 372 TokenType.DECIMAL256, 373 TokenType.UDECIMAL, 374 TokenType.BIGDECIMAL, 375 TokenType.UUID, 376 TokenType.GEOGRAPHY, 377 TokenType.GEOMETRY, 378 TokenType.POINT, 379 TokenType.RING, 380 TokenType.LINESTRING, 381 TokenType.MULTILINESTRING, 382 TokenType.POLYGON, 383 TokenType.MULTIPOLYGON, 384 TokenType.HLLSKETCH, 385 TokenType.HSTORE, 386 TokenType.PSEUDO_TYPE, 387 TokenType.SUPER, 388 TokenType.SERIAL, 389 TokenType.SMALLSERIAL, 390 TokenType.BIGSERIAL, 391 TokenType.XML, 392 TokenType.YEAR, 393 TokenType.USERDEFINED, 394 TokenType.MONEY, 395 TokenType.SMALLMONEY, 396 TokenType.ROWVERSION, 397 TokenType.IMAGE, 398 TokenType.VARIANT, 399 TokenType.VECTOR, 400 TokenType.OBJECT, 401 TokenType.OBJECT_IDENTIFIER, 402 TokenType.INET, 403 TokenType.IPADDRESS, 404 TokenType.IPPREFIX, 405 TokenType.IPV4, 406 TokenType.IPV6, 407 TokenType.UNKNOWN, 408 TokenType.NULL, 409 TokenType.NAME, 410 TokenType.TDIGEST, 411 TokenType.DYNAMIC, 412 *ENUM_TYPE_TOKENS, 413 *NESTED_TYPE_TOKENS, 414 *AGGREGATE_TYPE_TOKENS, 415 } 416 417 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 418 TokenType.BIGINT: TokenType.UBIGINT, 419 TokenType.INT: TokenType.UINT, 420 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 421 TokenType.SMALLINT: TokenType.USMALLINT, 422 TokenType.TINYINT: TokenType.UTINYINT, 423 TokenType.DECIMAL: TokenType.UDECIMAL, 424 TokenType.DOUBLE: TokenType.UDOUBLE, 425 } 426 427 SUBQUERY_PREDICATES = { 428 TokenType.ANY: exp.Any, 429 TokenType.ALL: exp.All, 430 TokenType.EXISTS: exp.Exists, 431 TokenType.SOME: exp.Any, 432 } 433 434 RESERVED_TOKENS = { 435 *Tokenizer.SINGLE_TOKENS.values(), 436 TokenType.SELECT, 437 } - {TokenType.IDENTIFIER} 438 439 DB_CREATABLES = { 440 TokenType.DATABASE, 441 TokenType.DICTIONARY, 442 TokenType.FILE_FORMAT, 443 TokenType.MODEL, 444 TokenType.NAMESPACE, 445 TokenType.SCHEMA, 446 TokenType.SEQUENCE, 447 TokenType.SINK, 448 TokenType.SOURCE, 449 TokenType.STAGE, 450 TokenType.STORAGE_INTEGRATION, 451 TokenType.STREAMLIT, 452 TokenType.TABLE, 453 TokenType.TAG, 454 TokenType.VIEW, 455 TokenType.WAREHOUSE, 456 } 457 458 CREATABLES = { 459 TokenType.COLUMN, 460 TokenType.CONSTRAINT, 461 TokenType.FOREIGN_KEY, 462 TokenType.FUNCTION, 463 TokenType.INDEX, 464 TokenType.PROCEDURE, 465 *DB_CREATABLES, 466 } 467 468 ALTERABLES = { 469 TokenType.INDEX, 470 TokenType.TABLE, 471 TokenType.VIEW, 472 } 473 474 # Tokens that can represent identifiers 475 ID_VAR_TOKENS = { 476 TokenType.ALL, 477 TokenType.ATTACH, 478 TokenType.VAR, 479 TokenType.ANTI, 480 TokenType.APPLY, 481 TokenType.ASC, 482 TokenType.ASOF, 483 TokenType.AUTO_INCREMENT, 484 TokenType.BEGIN, 485 TokenType.BPCHAR, 486 TokenType.CACHE, 487 TokenType.CASE, 488 TokenType.COLLATE, 489 TokenType.COMMAND, 490 TokenType.COMMENT, 491 TokenType.COMMIT, 492 TokenType.CONSTRAINT, 493 TokenType.COPY, 494 TokenType.CUBE, 495 TokenType.CURRENT_SCHEMA, 496 TokenType.DEFAULT, 497 TokenType.DELETE, 498 TokenType.DESC, 499 TokenType.DESCRIBE, 500 TokenType.DETACH, 501 TokenType.DICTIONARY, 502 TokenType.DIV, 503 TokenType.END, 504 TokenType.EXECUTE, 505 TokenType.EXPORT, 506 TokenType.ESCAPE, 507 TokenType.FALSE, 508 TokenType.FIRST, 509 TokenType.FILTER, 510 TokenType.FINAL, 511 TokenType.FORMAT, 512 TokenType.FULL, 513 TokenType.IDENTIFIER, 514 TokenType.IS, 515 TokenType.ISNULL, 516 TokenType.INTERVAL, 517 TokenType.KEEP, 518 TokenType.KILL, 519 TokenType.LEFT, 520 TokenType.LIMIT, 521 TokenType.LOAD, 522 TokenType.MERGE, 523 TokenType.NATURAL, 524 TokenType.NEXT, 525 TokenType.OFFSET, 526 TokenType.OPERATOR, 527 TokenType.ORDINALITY, 528 TokenType.OVERLAPS, 529 TokenType.OVERWRITE, 530 TokenType.PARTITION, 531 TokenType.PERCENT, 532 TokenType.PIVOT, 533 TokenType.PRAGMA, 534 TokenType.PUT, 535 TokenType.RANGE, 536 TokenType.RECURSIVE, 537 TokenType.REFERENCES, 538 TokenType.REFRESH, 539 TokenType.RENAME, 540 TokenType.REPLACE, 541 TokenType.RIGHT, 542 TokenType.ROLLUP, 543 TokenType.ROW, 544 TokenType.ROWS, 545 TokenType.SEMI, 546 TokenType.SET, 547 TokenType.SETTINGS, 548 TokenType.SHOW, 549 TokenType.TEMPORARY, 550 TokenType.TOP, 551 TokenType.TRUE, 552 TokenType.TRUNCATE, 553 TokenType.UNIQUE, 554 TokenType.UNNEST, 555 TokenType.UNPIVOT, 556 TokenType.UPDATE, 557 TokenType.USE, 558 TokenType.VOLATILE, 559 TokenType.WINDOW, 560 *CREATABLES, 561 *SUBQUERY_PREDICATES, 562 *TYPE_TOKENS, 563 *NO_PAREN_FUNCTIONS, 564 } 565 ID_VAR_TOKENS.remove(TokenType.UNION) 566 567 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 568 TokenType.ANTI, 569 TokenType.APPLY, 570 TokenType.ASOF, 571 TokenType.FULL, 572 TokenType.LEFT, 573 TokenType.LOCK, 574 TokenType.NATURAL, 575 TokenType.RIGHT, 576 TokenType.SEMI, 577 TokenType.WINDOW, 578 } 579 580 ALIAS_TOKENS = ID_VAR_TOKENS 581 582 ARRAY_CONSTRUCTORS = { 583 "ARRAY": exp.Array, 584 "LIST": exp.List, 585 } 586 587 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 588 589 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 590 591 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 592 593 FUNC_TOKENS = { 594 TokenType.COLLATE, 595 TokenType.COMMAND, 596 TokenType.CURRENT_DATE, 597 TokenType.CURRENT_DATETIME, 598 TokenType.CURRENT_SCHEMA, 599 TokenType.CURRENT_TIMESTAMP, 600 TokenType.CURRENT_TIME, 601 TokenType.CURRENT_USER, 602 TokenType.FILTER, 603 TokenType.FIRST, 604 TokenType.FORMAT, 605 TokenType.GLOB, 606 TokenType.IDENTIFIER, 607 TokenType.INDEX, 608 TokenType.ISNULL, 609 TokenType.ILIKE, 610 TokenType.INSERT, 611 TokenType.LIKE, 612 TokenType.MERGE, 613 TokenType.NEXT, 614 TokenType.OFFSET, 615 TokenType.PRIMARY_KEY, 616 TokenType.RANGE, 617 TokenType.REPLACE, 618 TokenType.RLIKE, 619 TokenType.ROW, 620 TokenType.UNNEST, 621 TokenType.VAR, 622 TokenType.LEFT, 623 TokenType.RIGHT, 624 TokenType.SEQUENCE, 625 TokenType.DATE, 626 TokenType.DATETIME, 627 TokenType.TABLE, 628 TokenType.TIMESTAMP, 629 TokenType.TIMESTAMPTZ, 630 TokenType.TRUNCATE, 631 TokenType.WINDOW, 632 TokenType.XOR, 633 *TYPE_TOKENS, 634 *SUBQUERY_PREDICATES, 635 } 636 637 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 638 TokenType.AND: exp.And, 639 } 640 641 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 642 TokenType.COLON_EQ: exp.PropertyEQ, 643 } 644 645 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.OR: exp.Or, 647 } 648 649 EQUALITY = { 650 TokenType.EQ: exp.EQ, 651 TokenType.NEQ: exp.NEQ, 652 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 653 } 654 655 COMPARISON = { 656 TokenType.GT: exp.GT, 657 TokenType.GTE: exp.GTE, 658 TokenType.LT: exp.LT, 659 TokenType.LTE: exp.LTE, 660 } 661 662 BITWISE = { 663 TokenType.AMP: exp.BitwiseAnd, 664 TokenType.CARET: exp.BitwiseXor, 665 TokenType.PIPE: exp.BitwiseOr, 666 } 667 668 TERM = { 669 TokenType.DASH: exp.Sub, 670 TokenType.PLUS: exp.Add, 671 TokenType.MOD: exp.Mod, 672 TokenType.COLLATE: exp.Collate, 673 } 674 675 FACTOR = { 676 TokenType.DIV: exp.IntDiv, 677 TokenType.LR_ARROW: exp.Distance, 678 TokenType.SLASH: exp.Div, 679 TokenType.STAR: exp.Mul, 680 } 681 682 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 683 684 TIMES = { 685 TokenType.TIME, 686 TokenType.TIMETZ, 687 } 688 689 TIMESTAMPS = { 690 TokenType.TIMESTAMP, 691 TokenType.TIMESTAMPNTZ, 692 TokenType.TIMESTAMPTZ, 693 TokenType.TIMESTAMPLTZ, 694 *TIMES, 695 } 696 697 SET_OPERATIONS = { 698 TokenType.UNION, 699 TokenType.INTERSECT, 700 TokenType.EXCEPT, 701 } 702 703 JOIN_METHODS = { 704 TokenType.ASOF, 705 TokenType.NATURAL, 706 TokenType.POSITIONAL, 707 } 708 709 JOIN_SIDES = { 710 TokenType.LEFT, 711 TokenType.RIGHT, 712 TokenType.FULL, 713 } 714 715 JOIN_KINDS = { 716 TokenType.ANTI, 717 TokenType.CROSS, 718 TokenType.INNER, 719 TokenType.OUTER, 720 TokenType.SEMI, 721 TokenType.STRAIGHT_JOIN, 722 } 723 724 JOIN_HINTS: t.Set[str] = set() 725 726 LAMBDAS = { 727 TokenType.ARROW: lambda self, expressions: self.expression( 728 exp.Lambda, 729 this=self._replace_lambda( 730 self._parse_assignment(), 731 expressions, 732 ), 733 expressions=expressions, 734 ), 735 TokenType.FARROW: lambda self, expressions: self.expression( 736 exp.Kwarg, 737 this=exp.var(expressions[0].name), 738 expression=self._parse_assignment(), 739 ), 740 } 741 742 COLUMN_OPERATORS = { 743 TokenType.DOT: None, 744 TokenType.DOTCOLON: lambda self, this, to: self.expression( 745 exp.JSONCast, 746 this=this, 747 to=to, 748 ), 749 TokenType.DCOLON: lambda self, this, to: self.expression( 750 exp.Cast if self.STRICT_CAST else exp.TryCast, 751 this=this, 752 to=to, 753 ), 754 TokenType.ARROW: lambda self, this, path: self.expression( 755 exp.JSONExtract, 756 this=this, 757 expression=self.dialect.to_json_path(path), 758 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 759 ), 760 TokenType.DARROW: lambda self, this, path: self.expression( 761 exp.JSONExtractScalar, 762 this=this, 763 expression=self.dialect.to_json_path(path), 764 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 765 ), 766 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 767 exp.JSONBExtract, 768 this=this, 769 expression=path, 770 ), 771 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 772 exp.JSONBExtractScalar, 773 this=this, 774 expression=path, 775 ), 776 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 777 exp.JSONBContains, 778 this=this, 779 expression=key, 780 ), 781 } 782 783 EXPRESSION_PARSERS = { 784 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 785 exp.Column: lambda self: self._parse_column(), 786 exp.Condition: lambda self: self._parse_assignment(), 787 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 788 exp.Expression: lambda self: self._parse_expression(), 789 exp.From: lambda self: self._parse_from(joins=True), 790 exp.Group: lambda self: self._parse_group(), 791 exp.Having: lambda self: self._parse_having(), 792 exp.Hint: lambda self: self._parse_hint_body(), 793 exp.Identifier: lambda self: self._parse_id_var(), 794 exp.Join: lambda self: self._parse_join(), 795 exp.Lambda: lambda self: self._parse_lambda(), 796 exp.Lateral: lambda self: self._parse_lateral(), 797 exp.Limit: lambda self: self._parse_limit(), 798 exp.Offset: lambda self: self._parse_offset(), 799 exp.Order: lambda self: self._parse_order(), 800 exp.Ordered: lambda self: self._parse_ordered(), 801 exp.Properties: lambda self: self._parse_properties(), 802 exp.Qualify: lambda self: self._parse_qualify(), 803 exp.Returning: lambda self: self._parse_returning(), 804 exp.Select: lambda self: self._parse_select(), 805 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 806 exp.Table: lambda self: self._parse_table_parts(), 807 exp.TableAlias: lambda self: self._parse_table_alias(), 808 exp.Tuple: lambda self: self._parse_value(values=False), 809 exp.Whens: lambda self: self._parse_when_matched(), 810 exp.Where: lambda self: self._parse_where(), 811 exp.Window: lambda self: self._parse_named_window(), 812 exp.With: lambda self: self._parse_with(), 813 "JOIN_TYPE": lambda self: self._parse_join_parts(), 814 } 815 816 STATEMENT_PARSERS = { 817 TokenType.ALTER: lambda self: self._parse_alter(), 818 TokenType.ANALYZE: lambda self: self._parse_analyze(), 819 TokenType.BEGIN: lambda self: self._parse_transaction(), 820 TokenType.CACHE: lambda self: self._parse_cache(), 821 TokenType.COMMENT: lambda self: self._parse_comment(), 822 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 823 TokenType.COPY: lambda self: self._parse_copy(), 824 TokenType.CREATE: lambda self: self._parse_create(), 825 TokenType.DELETE: lambda self: self._parse_delete(), 826 TokenType.DESC: lambda self: self._parse_describe(), 827 TokenType.DESCRIBE: lambda self: self._parse_describe(), 828 TokenType.DROP: lambda self: self._parse_drop(), 829 TokenType.GRANT: lambda self: self._parse_grant(), 830 TokenType.INSERT: lambda self: self._parse_insert(), 831 TokenType.KILL: lambda self: self._parse_kill(), 832 TokenType.LOAD: lambda self: self._parse_load(), 833 TokenType.MERGE: lambda self: self._parse_merge(), 834 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 835 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 836 TokenType.REFRESH: lambda self: self._parse_refresh(), 837 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 838 TokenType.SET: lambda self: self._parse_set(), 839 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 840 TokenType.UNCACHE: lambda self: self._parse_uncache(), 841 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 842 TokenType.UPDATE: lambda self: self._parse_update(), 843 TokenType.USE: lambda self: self._parse_use(), 844 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 845 } 846 847 UNARY_PARSERS = { 848 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 849 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 850 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 851 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 852 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 853 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 854 } 855 856 STRING_PARSERS = { 857 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 858 exp.RawString, this=token.text 859 ), 860 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 861 exp.National, this=token.text 862 ), 863 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 864 TokenType.STRING: lambda self, token: self.expression( 865 exp.Literal, this=token.text, is_string=True 866 ), 867 TokenType.UNICODE_STRING: lambda self, token: self.expression( 868 exp.UnicodeString, 869 this=token.text, 870 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 871 ), 872 } 873 874 NUMERIC_PARSERS = { 875 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 876 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 877 TokenType.HEX_STRING: lambda self, token: self.expression( 878 exp.HexString, 879 this=token.text, 880 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 881 ), 882 TokenType.NUMBER: lambda self, token: self.expression( 883 exp.Literal, this=token.text, is_string=False 884 ), 885 } 886 887 PRIMARY_PARSERS = { 888 **STRING_PARSERS, 889 **NUMERIC_PARSERS, 890 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 891 TokenType.NULL: lambda self, _: self.expression(exp.Null), 892 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 893 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 894 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 895 TokenType.STAR: lambda self, _: self._parse_star_ops(), 896 } 897 898 PLACEHOLDER_PARSERS = { 899 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 900 TokenType.PARAMETER: lambda self: self._parse_parameter(), 901 TokenType.COLON: lambda self: ( 902 self.expression(exp.Placeholder, this=self._prev.text) 903 if self._match_set(self.ID_VAR_TOKENS) 904 else None 905 ), 906 } 907 908 RANGE_PARSERS = { 909 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 910 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 911 TokenType.GLOB: binary_range_parser(exp.Glob), 912 TokenType.ILIKE: binary_range_parser(exp.ILike), 913 TokenType.IN: lambda self, this: self._parse_in(this), 914 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 915 TokenType.IS: lambda self, this: self._parse_is(this), 916 TokenType.LIKE: binary_range_parser(exp.Like), 917 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 918 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 919 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 920 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 921 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 922 } 923 924 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 925 "ALLOWED_VALUES": lambda self: self.expression( 926 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 927 ), 928 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 929 "AUTO": lambda self: self._parse_auto_property(), 930 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 931 "BACKUP": lambda self: self.expression( 932 exp.BackupProperty, this=self._parse_var(any_token=True) 933 ), 934 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 935 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 936 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 937 "CHECKSUM": lambda self: self._parse_checksum(), 938 "CLUSTER BY": lambda self: self._parse_cluster(), 939 "CLUSTERED": lambda self: self._parse_clustered_by(), 940 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 941 exp.CollateProperty, **kwargs 942 ), 943 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 944 "CONTAINS": lambda self: self._parse_contains_property(), 945 "COPY": lambda self: self._parse_copy_property(), 946 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 947 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 948 "DEFINER": lambda self: self._parse_definer(), 949 "DETERMINISTIC": lambda self: self.expression( 950 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 951 ), 952 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 953 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 954 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 955 "DISTKEY": lambda self: self._parse_distkey(), 956 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 957 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 958 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 959 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 960 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 961 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 962 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 963 "FREESPACE": lambda self: self._parse_freespace(), 964 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 965 "HEAP": lambda self: self.expression(exp.HeapProperty), 966 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 967 "IMMUTABLE": lambda self: self.expression( 968 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 969 ), 970 "INHERITS": lambda self: self.expression( 971 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 972 ), 973 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 974 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 975 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 976 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 977 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 978 "LIKE": lambda self: self._parse_create_like(), 979 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 980 "LOCK": lambda self: self._parse_locking(), 981 "LOCKING": lambda self: self._parse_locking(), 982 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 983 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 984 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 985 "MODIFIES": lambda self: self._parse_modifies_property(), 986 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 987 "NO": lambda self: self._parse_no_property(), 988 "ON": lambda self: self._parse_on_property(), 989 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 990 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 991 "PARTITION": lambda self: self._parse_partitioned_of(), 992 "PARTITION BY": lambda self: self._parse_partitioned_by(), 993 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 994 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 995 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 996 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 997 "READS": lambda self: self._parse_reads_property(), 998 "REMOTE": lambda self: self._parse_remote_with_connection(), 999 "RETURNS": lambda self: self._parse_returns(), 1000 "STRICT": lambda self: self.expression(exp.StrictProperty), 1001 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1002 "ROW": lambda self: self._parse_row(), 1003 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1004 "SAMPLE": lambda self: self.expression( 1005 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1006 ), 1007 "SECURE": lambda self: self.expression(exp.SecureProperty), 1008 "SECURITY": lambda self: self._parse_security(), 1009 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1010 "SETTINGS": lambda self: self._parse_settings_property(), 1011 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1012 "SORTKEY": lambda self: self._parse_sortkey(), 1013 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1014 "STABLE": lambda self: self.expression( 1015 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1016 ), 1017 "STORED": lambda self: self._parse_stored(), 1018 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1019 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1020 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1021 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1022 "TO": lambda self: self._parse_to_table(), 1023 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1024 "TRANSFORM": lambda self: self.expression( 1025 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1026 ), 1027 "TTL": lambda self: self._parse_ttl(), 1028 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1029 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1030 "VOLATILE": lambda self: self._parse_volatile_property(), 1031 "WITH": lambda self: self._parse_with_property(), 1032 } 1033 1034 CONSTRAINT_PARSERS = { 1035 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1036 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1037 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1038 "CHARACTER SET": lambda self: self.expression( 1039 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1040 ), 1041 "CHECK": lambda self: self.expression( 1042 exp.CheckColumnConstraint, 1043 this=self._parse_wrapped(self._parse_assignment), 1044 enforced=self._match_text_seq("ENFORCED"), 1045 ), 1046 "COLLATE": lambda self: self.expression( 1047 exp.CollateColumnConstraint, 1048 this=self._parse_identifier() or self._parse_column(), 1049 ), 1050 "COMMENT": lambda self: self.expression( 1051 exp.CommentColumnConstraint, this=self._parse_string() 1052 ), 1053 "COMPRESS": lambda self: self._parse_compress(), 1054 "CLUSTERED": lambda self: self.expression( 1055 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1056 ), 1057 "NONCLUSTERED": lambda self: self.expression( 1058 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1059 ), 1060 "DEFAULT": lambda self: self.expression( 1061 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1062 ), 1063 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1064 "EPHEMERAL": lambda self: self.expression( 1065 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1066 ), 1067 "EXCLUDE": lambda self: self.expression( 1068 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1069 ), 1070 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1071 "FORMAT": lambda self: self.expression( 1072 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1073 ), 1074 "GENERATED": lambda self: self._parse_generated_as_identity(), 1075 "IDENTITY": lambda self: self._parse_auto_increment(), 1076 "INLINE": lambda self: self._parse_inline(), 1077 "LIKE": lambda self: self._parse_create_like(), 1078 "NOT": lambda self: self._parse_not_constraint(), 1079 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1080 "ON": lambda self: ( 1081 self._match(TokenType.UPDATE) 1082 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1083 ) 1084 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1085 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1086 "PERIOD": lambda self: self._parse_period_for_system_time(), 1087 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1088 "REFERENCES": lambda self: self._parse_references(match=False), 1089 "TITLE": lambda self: self.expression( 1090 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1091 ), 1092 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1093 "UNIQUE": lambda self: self._parse_unique(), 1094 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1095 "WATERMARK": lambda self: self.expression( 1096 exp.WatermarkColumnConstraint, 1097 this=self._match(TokenType.FOR) and self._parse_column(), 1098 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1099 ), 1100 "WITH": lambda self: self.expression( 1101 exp.Properties, expressions=self._parse_wrapped_properties() 1102 ), 1103 } 1104 1105 ALTER_PARSERS = { 1106 "ADD": lambda self: self._parse_alter_table_add(), 1107 "AS": lambda self: self._parse_select(), 1108 "ALTER": lambda self: self._parse_alter_table_alter(), 1109 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1110 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1111 "DROP": lambda self: self._parse_alter_table_drop(), 1112 "RENAME": lambda self: self._parse_alter_table_rename(), 1113 "SET": lambda self: self._parse_alter_table_set(), 1114 "SWAP": lambda self: self.expression( 1115 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1116 ), 1117 } 1118 1119 ALTER_ALTER_PARSERS = { 1120 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1121 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1122 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1123 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1124 } 1125 1126 SCHEMA_UNNAMED_CONSTRAINTS = { 1127 "CHECK", 1128 "EXCLUDE", 1129 "FOREIGN KEY", 1130 "LIKE", 1131 "PERIOD", 1132 "PRIMARY KEY", 1133 "UNIQUE", 1134 "WATERMARK", 1135 } 1136 1137 NO_PAREN_FUNCTION_PARSERS = { 1138 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1139 "CASE": lambda self: self._parse_case(), 1140 "CONNECT_BY_ROOT": lambda self: self.expression( 1141 exp.ConnectByRoot, this=self._parse_column() 1142 ), 1143 "IF": lambda self: self._parse_if(), 1144 } 1145 1146 INVALID_FUNC_NAME_TOKENS = { 1147 TokenType.IDENTIFIER, 1148 TokenType.STRING, 1149 } 1150 1151 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1152 1153 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1154 1155 FUNCTION_PARSERS = { 1156 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1157 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1158 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1159 "DECODE": lambda self: self._parse_decode(), 1160 "EXTRACT": lambda self: self._parse_extract(), 1161 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1162 "GAP_FILL": lambda self: self._parse_gap_fill(), 1163 "JSON_OBJECT": lambda self: self._parse_json_object(), 1164 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1165 "JSON_TABLE": lambda self: self._parse_json_table(), 1166 "MATCH": lambda self: self._parse_match_against(), 1167 "NORMALIZE": lambda self: self._parse_normalize(), 1168 "OPENJSON": lambda self: self._parse_open_json(), 1169 "OVERLAY": lambda self: self._parse_overlay(), 1170 "POSITION": lambda self: self._parse_position(), 1171 "PREDICT": lambda self: self._parse_predict(), 1172 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1173 "STRING_AGG": lambda self: self._parse_string_agg(), 1174 "SUBSTRING": lambda self: self._parse_substring(), 1175 "TRIM": lambda self: self._parse_trim(), 1176 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1177 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1178 "XMLELEMENT": lambda self: self.expression( 1179 exp.XMLElement, 1180 this=self._match_text_seq("NAME") and self._parse_id_var(), 1181 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1182 ), 1183 "XMLTABLE": lambda self: self._parse_xml_table(), 1184 } 1185 1186 QUERY_MODIFIER_PARSERS = { 1187 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1188 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1189 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1190 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1191 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1192 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1193 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1194 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1195 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1196 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1197 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1198 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1199 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1200 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1201 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1202 TokenType.CLUSTER_BY: lambda self: ( 1203 "cluster", 1204 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1205 ), 1206 TokenType.DISTRIBUTE_BY: lambda self: ( 1207 "distribute", 1208 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1209 ), 1210 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1211 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1212 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1213 } 1214 1215 SET_PARSERS = { 1216 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1217 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1218 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1219 "TRANSACTION": lambda self: self._parse_set_transaction(), 1220 } 1221 1222 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1223 1224 TYPE_LITERAL_PARSERS = { 1225 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1226 } 1227 1228 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1229 1230 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1231 1232 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1233 1234 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1235 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1236 "ISOLATION": ( 1237 ("LEVEL", "REPEATABLE", "READ"), 1238 ("LEVEL", "READ", "COMMITTED"), 1239 ("LEVEL", "READ", "UNCOMITTED"), 1240 ("LEVEL", "SERIALIZABLE"), 1241 ), 1242 "READ": ("WRITE", "ONLY"), 1243 } 1244 1245 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1246 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1247 ) 1248 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1249 1250 CREATE_SEQUENCE: OPTIONS_TYPE = { 1251 "SCALE": ("EXTEND", "NOEXTEND"), 1252 "SHARD": ("EXTEND", "NOEXTEND"), 1253 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1254 **dict.fromkeys( 1255 ( 1256 "SESSION", 1257 "GLOBAL", 1258 "KEEP", 1259 "NOKEEP", 1260 "ORDER", 1261 "NOORDER", 1262 "NOCACHE", 1263 "CYCLE", 1264 "NOCYCLE", 1265 "NOMINVALUE", 1266 "NOMAXVALUE", 1267 "NOSCALE", 1268 "NOSHARD", 1269 ), 1270 tuple(), 1271 ), 1272 } 1273 1274 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1275 1276 USABLES: OPTIONS_TYPE = dict.fromkeys( 1277 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1278 ) 1279 1280 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1281 1282 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1283 "TYPE": ("EVOLUTION",), 1284 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1285 } 1286 1287 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1288 1289 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1290 1291 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1292 "NOT": ("ENFORCED",), 1293 "MATCH": ( 1294 "FULL", 1295 "PARTIAL", 1296 "SIMPLE", 1297 ), 1298 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1299 "USING": ( 1300 "BTREE", 1301 "HASH", 1302 ), 1303 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1304 } 1305 1306 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1307 1308 CLONE_KEYWORDS = {"CLONE", "COPY"} 1309 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1310 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1311 1312 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1313 1314 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1315 1316 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1317 1318 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1319 1320 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1321 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1322 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1323 1324 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1325 1326 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1327 1328 ADD_CONSTRAINT_TOKENS = { 1329 TokenType.CONSTRAINT, 1330 TokenType.FOREIGN_KEY, 1331 TokenType.INDEX, 1332 TokenType.KEY, 1333 TokenType.PRIMARY_KEY, 1334 TokenType.UNIQUE, 1335 } 1336 1337 DISTINCT_TOKENS = {TokenType.DISTINCT} 1338 1339 NULL_TOKENS = {TokenType.NULL} 1340 1341 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1342 1343 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1344 1345 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1346 1347 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1348 1349 ODBC_DATETIME_LITERALS = { 1350 "d": exp.Date, 1351 "t": exp.Time, 1352 "ts": exp.Timestamp, 1353 } 1354 1355 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1356 1357 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1358 1359 # The style options for the DESCRIBE statement 1360 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1361 1362 # The style options for the ANALYZE statement 1363 ANALYZE_STYLES = { 1364 "BUFFER_USAGE_LIMIT", 1365 "FULL", 1366 "LOCAL", 1367 "NO_WRITE_TO_BINLOG", 1368 "SAMPLE", 1369 "SKIP_LOCKED", 1370 "VERBOSE", 1371 } 1372 1373 ANALYZE_EXPRESSION_PARSERS = { 1374 "ALL": lambda self: self._parse_analyze_columns(), 1375 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1376 "DELETE": lambda self: self._parse_analyze_delete(), 1377 "DROP": lambda self: self._parse_analyze_histogram(), 1378 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1379 "LIST": lambda self: self._parse_analyze_list(), 1380 "PREDICATE": lambda self: self._parse_analyze_columns(), 1381 "UPDATE": lambda self: self._parse_analyze_histogram(), 1382 "VALIDATE": lambda self: self._parse_analyze_validate(), 1383 } 1384 1385 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1386 1387 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1388 1389 OPERATION_MODIFIERS: t.Set[str] = set() 1390 1391 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1392 1393 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1394 1395 STRICT_CAST = True 1396 1397 PREFIXED_PIVOT_COLUMNS = False 1398 IDENTIFY_PIVOT_STRINGS = False 1399 1400 LOG_DEFAULTS_TO_LN = False 1401 1402 # Whether ADD is present for each column added by ALTER TABLE 1403 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1404 1405 # Whether the table sample clause expects CSV syntax 1406 TABLESAMPLE_CSV = False 1407 1408 # The default method used for table sampling 1409 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1410 1411 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1412 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1413 1414 # Whether the TRIM function expects the characters to trim as its first argument 1415 TRIM_PATTERN_FIRST = False 1416 1417 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1418 STRING_ALIASES = False 1419 1420 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1421 MODIFIERS_ATTACHED_TO_SET_OP = True 1422 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1423 1424 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1425 NO_PAREN_IF_COMMANDS = True 1426 1427 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1428 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1429 1430 # Whether the `:` operator is used to extract a value from a VARIANT column 1431 COLON_IS_VARIANT_EXTRACT = False 1432 1433 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1434 # If this is True and '(' is not found, the keyword will be treated as an identifier 1435 VALUES_FOLLOWED_BY_PAREN = True 1436 1437 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1438 SUPPORTS_IMPLICIT_UNNEST = False 1439 1440 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1441 INTERVAL_SPANS = True 1442 1443 # Whether a PARTITION clause can follow a table reference 1444 SUPPORTS_PARTITION_SELECTION = False 1445 1446 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1447 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1448 1449 # Whether the 'AS' keyword is optional in the CTE definition syntax 1450 OPTIONAL_ALIAS_TOKEN_CTE = True 1451 1452 __slots__ = ( 1453 "error_level", 1454 "error_message_context", 1455 "max_errors", 1456 "dialect", 1457 "sql", 1458 "errors", 1459 "_tokens", 1460 "_index", 1461 "_curr", 1462 "_next", 1463 "_prev", 1464 "_prev_comments", 1465 ) 1466 1467 # Autofilled 1468 SHOW_TRIE: t.Dict = {} 1469 SET_TRIE: t.Dict = {} 1470 1471 def __init__( 1472 self, 1473 error_level: t.Optional[ErrorLevel] = None, 1474 error_message_context: int = 100, 1475 max_errors: int = 3, 1476 dialect: DialectType = None, 1477 ): 1478 from sqlglot.dialects import Dialect 1479 1480 self.error_level = error_level or ErrorLevel.IMMEDIATE 1481 self.error_message_context = error_message_context 1482 self.max_errors = max_errors 1483 self.dialect = Dialect.get_or_raise(dialect) 1484 self.reset() 1485 1486 def reset(self): 1487 self.sql = "" 1488 self.errors = [] 1489 self._tokens = [] 1490 self._index = 0 1491 self._curr = None 1492 self._next = None 1493 self._prev = None 1494 self._prev_comments = None 1495 1496 def parse( 1497 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1498 ) -> t.List[t.Optional[exp.Expression]]: 1499 """ 1500 Parses a list of tokens and returns a list of syntax trees, one tree 1501 per parsed SQL statement. 1502 1503 Args: 1504 raw_tokens: The list of tokens. 1505 sql: The original SQL string, used to produce helpful debug messages. 1506 1507 Returns: 1508 The list of the produced syntax trees. 1509 """ 1510 return self._parse( 1511 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1512 ) 1513 1514 def parse_into( 1515 self, 1516 expression_types: exp.IntoType, 1517 raw_tokens: t.List[Token], 1518 sql: t.Optional[str] = None, 1519 ) -> t.List[t.Optional[exp.Expression]]: 1520 """ 1521 Parses a list of tokens into a given Expression type. If a collection of Expression 1522 types is given instead, this method will try to parse the token list into each one 1523 of them, stopping at the first for which the parsing succeeds. 1524 1525 Args: 1526 expression_types: The expression type(s) to try and parse the token list into. 1527 raw_tokens: The list of tokens. 1528 sql: The original SQL string, used to produce helpful debug messages. 1529 1530 Returns: 1531 The target Expression. 1532 """ 1533 errors = [] 1534 for expression_type in ensure_list(expression_types): 1535 parser = self.EXPRESSION_PARSERS.get(expression_type) 1536 if not parser: 1537 raise TypeError(f"No parser registered for {expression_type}") 1538 1539 try: 1540 return self._parse(parser, raw_tokens, sql) 1541 except ParseError as e: 1542 e.errors[0]["into_expression"] = expression_type 1543 errors.append(e) 1544 1545 raise ParseError( 1546 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1547 errors=merge_errors(errors), 1548 ) from errors[-1] 1549 1550 def _parse( 1551 self, 1552 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1553 raw_tokens: t.List[Token], 1554 sql: t.Optional[str] = None, 1555 ) -> t.List[t.Optional[exp.Expression]]: 1556 self.reset() 1557 self.sql = sql or "" 1558 1559 total = len(raw_tokens) 1560 chunks: t.List[t.List[Token]] = [[]] 1561 1562 for i, token in enumerate(raw_tokens): 1563 if token.token_type == TokenType.SEMICOLON: 1564 if token.comments: 1565 chunks.append([token]) 1566 1567 if i < total - 1: 1568 chunks.append([]) 1569 else: 1570 chunks[-1].append(token) 1571 1572 expressions = [] 1573 1574 for tokens in chunks: 1575 self._index = -1 1576 self._tokens = tokens 1577 self._advance() 1578 1579 expressions.append(parse_method(self)) 1580 1581 if self._index < len(self._tokens): 1582 self.raise_error("Invalid expression / Unexpected token") 1583 1584 self.check_errors() 1585 1586 return expressions 1587 1588 def check_errors(self) -> None: 1589 """Logs or raises any found errors, depending on the chosen error level setting.""" 1590 if self.error_level == ErrorLevel.WARN: 1591 for error in self.errors: 1592 logger.error(str(error)) 1593 elif self.error_level == ErrorLevel.RAISE and self.errors: 1594 raise ParseError( 1595 concat_messages(self.errors, self.max_errors), 1596 errors=merge_errors(self.errors), 1597 ) 1598 1599 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1600 """ 1601 Appends an error in the list of recorded errors or raises it, depending on the chosen 1602 error level setting. 1603 """ 1604 token = token or self._curr or self._prev or Token.string("") 1605 start = token.start 1606 end = token.end + 1 1607 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1608 highlight = self.sql[start:end] 1609 end_context = self.sql[end : end + self.error_message_context] 1610 1611 error = ParseError.new( 1612 f"{message}. Line {token.line}, Col: {token.col}.\n" 1613 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1614 description=message, 1615 line=token.line, 1616 col=token.col, 1617 start_context=start_context, 1618 highlight=highlight, 1619 end_context=end_context, 1620 ) 1621 1622 if self.error_level == ErrorLevel.IMMEDIATE: 1623 raise error 1624 1625 self.errors.append(error) 1626 1627 def expression( 1628 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1629 ) -> E: 1630 """ 1631 Creates a new, validated Expression. 1632 1633 Args: 1634 exp_class: The expression class to instantiate. 1635 comments: An optional list of comments to attach to the expression. 1636 kwargs: The arguments to set for the expression along with their respective values. 1637 1638 Returns: 1639 The target expression. 1640 """ 1641 instance = exp_class(**kwargs) 1642 instance.add_comments(comments) if comments else self._add_comments(instance) 1643 return self.validate_expression(instance) 1644 1645 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1646 if expression and self._prev_comments: 1647 expression.add_comments(self._prev_comments) 1648 self._prev_comments = None 1649 1650 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1651 """ 1652 Validates an Expression, making sure that all its mandatory arguments are set. 1653 1654 Args: 1655 expression: The expression to validate. 1656 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1657 1658 Returns: 1659 The validated expression. 1660 """ 1661 if self.error_level != ErrorLevel.IGNORE: 1662 for error_message in expression.error_messages(args): 1663 self.raise_error(error_message) 1664 1665 return expression 1666 1667 def _find_sql(self, start: Token, end: Token) -> str: 1668 return self.sql[start.start : end.end + 1] 1669 1670 def _is_connected(self) -> bool: 1671 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1672 1673 def _advance(self, times: int = 1) -> None: 1674 self._index += times 1675 self._curr = seq_get(self._tokens, self._index) 1676 self._next = seq_get(self._tokens, self._index + 1) 1677 1678 if self._index > 0: 1679 self._prev = self._tokens[self._index - 1] 1680 self._prev_comments = self._prev.comments 1681 else: 1682 self._prev = None 1683 self._prev_comments = None 1684 1685 def _retreat(self, index: int) -> None: 1686 if index != self._index: 1687 self._advance(index - self._index) 1688 1689 def _warn_unsupported(self) -> None: 1690 if len(self._tokens) <= 1: 1691 return 1692 1693 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1694 # interested in emitting a warning for the one being currently processed. 1695 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1696 1697 logger.warning( 1698 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1699 ) 1700 1701 def _parse_command(self) -> exp.Command: 1702 self._warn_unsupported() 1703 return self.expression( 1704 exp.Command, 1705 comments=self._prev_comments, 1706 this=self._prev.text.upper(), 1707 expression=self._parse_string(), 1708 ) 1709 1710 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1711 """ 1712 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1713 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1714 solve this by setting & resetting the parser state accordingly 1715 """ 1716 index = self._index 1717 error_level = self.error_level 1718 1719 self.error_level = ErrorLevel.IMMEDIATE 1720 try: 1721 this = parse_method() 1722 except ParseError: 1723 this = None 1724 finally: 1725 if not this or retreat: 1726 self._retreat(index) 1727 self.error_level = error_level 1728 1729 return this 1730 1731 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1732 start = self._prev 1733 exists = self._parse_exists() if allow_exists else None 1734 1735 self._match(TokenType.ON) 1736 1737 materialized = self._match_text_seq("MATERIALIZED") 1738 kind = self._match_set(self.CREATABLES) and self._prev 1739 if not kind: 1740 return self._parse_as_command(start) 1741 1742 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1743 this = self._parse_user_defined_function(kind=kind.token_type) 1744 elif kind.token_type == TokenType.TABLE: 1745 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1746 elif kind.token_type == TokenType.COLUMN: 1747 this = self._parse_column() 1748 else: 1749 this = self._parse_id_var() 1750 1751 self._match(TokenType.IS) 1752 1753 return self.expression( 1754 exp.Comment, 1755 this=this, 1756 kind=kind.text, 1757 expression=self._parse_string(), 1758 exists=exists, 1759 materialized=materialized, 1760 ) 1761 1762 def _parse_to_table( 1763 self, 1764 ) -> exp.ToTableProperty: 1765 table = self._parse_table_parts(schema=True) 1766 return self.expression(exp.ToTableProperty, this=table) 1767 1768 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1769 def _parse_ttl(self) -> exp.Expression: 1770 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1771 this = self._parse_bitwise() 1772 1773 if self._match_text_seq("DELETE"): 1774 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1775 if self._match_text_seq("RECOMPRESS"): 1776 return self.expression( 1777 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1778 ) 1779 if self._match_text_seq("TO", "DISK"): 1780 return self.expression( 1781 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1782 ) 1783 if self._match_text_seq("TO", "VOLUME"): 1784 return self.expression( 1785 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1786 ) 1787 1788 return this 1789 1790 expressions = self._parse_csv(_parse_ttl_action) 1791 where = self._parse_where() 1792 group = self._parse_group() 1793 1794 aggregates = None 1795 if group and self._match(TokenType.SET): 1796 aggregates = self._parse_csv(self._parse_set_item) 1797 1798 return self.expression( 1799 exp.MergeTreeTTL, 1800 expressions=expressions, 1801 where=where, 1802 group=group, 1803 aggregates=aggregates, 1804 ) 1805 1806 def _parse_statement(self) -> t.Optional[exp.Expression]: 1807 if self._curr is None: 1808 return None 1809 1810 if self._match_set(self.STATEMENT_PARSERS): 1811 comments = self._prev_comments 1812 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1813 stmt.add_comments(comments, prepend=True) 1814 return stmt 1815 1816 if self._match_set(self.dialect.tokenizer.COMMANDS): 1817 return self._parse_command() 1818 1819 expression = self._parse_expression() 1820 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1821 return self._parse_query_modifiers(expression) 1822 1823 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1824 start = self._prev 1825 temporary = self._match(TokenType.TEMPORARY) 1826 materialized = self._match_text_seq("MATERIALIZED") 1827 1828 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1829 if not kind: 1830 return self._parse_as_command(start) 1831 1832 concurrently = self._match_text_seq("CONCURRENTLY") 1833 if_exists = exists or self._parse_exists() 1834 1835 if kind == "COLUMN": 1836 this = self._parse_column() 1837 else: 1838 this = self._parse_table_parts( 1839 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1840 ) 1841 1842 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1843 1844 if self._match(TokenType.L_PAREN, advance=False): 1845 expressions = self._parse_wrapped_csv(self._parse_types) 1846 else: 1847 expressions = None 1848 1849 return self.expression( 1850 exp.Drop, 1851 exists=if_exists, 1852 this=this, 1853 expressions=expressions, 1854 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1855 temporary=temporary, 1856 materialized=materialized, 1857 cascade=self._match_text_seq("CASCADE"), 1858 constraints=self._match_text_seq("CONSTRAINTS"), 1859 purge=self._match_text_seq("PURGE"), 1860 cluster=cluster, 1861 concurrently=concurrently, 1862 ) 1863 1864 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1865 return ( 1866 self._match_text_seq("IF") 1867 and (not not_ or self._match(TokenType.NOT)) 1868 and self._match(TokenType.EXISTS) 1869 ) 1870 1871 def _parse_create(self) -> exp.Create | exp.Command: 1872 # Note: this can't be None because we've matched a statement parser 1873 start = self._prev 1874 1875 replace = ( 1876 start.token_type == TokenType.REPLACE 1877 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1878 or self._match_pair(TokenType.OR, TokenType.ALTER) 1879 ) 1880 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1881 1882 unique = self._match(TokenType.UNIQUE) 1883 1884 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1885 clustered = True 1886 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1887 "COLUMNSTORE" 1888 ): 1889 clustered = False 1890 else: 1891 clustered = None 1892 1893 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1894 self._advance() 1895 1896 properties = None 1897 create_token = self._match_set(self.CREATABLES) and self._prev 1898 1899 if not create_token: 1900 # exp.Properties.Location.POST_CREATE 1901 properties = self._parse_properties() 1902 create_token = self._match_set(self.CREATABLES) and self._prev 1903 1904 if not properties or not create_token: 1905 return self._parse_as_command(start) 1906 1907 concurrently = self._match_text_seq("CONCURRENTLY") 1908 exists = self._parse_exists(not_=True) 1909 this = None 1910 expression: t.Optional[exp.Expression] = None 1911 indexes = None 1912 no_schema_binding = None 1913 begin = None 1914 end = None 1915 clone = None 1916 1917 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1918 nonlocal properties 1919 if properties and temp_props: 1920 properties.expressions.extend(temp_props.expressions) 1921 elif temp_props: 1922 properties = temp_props 1923 1924 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1925 this = self._parse_user_defined_function(kind=create_token.token_type) 1926 1927 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1928 extend_props(self._parse_properties()) 1929 1930 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1931 extend_props(self._parse_properties()) 1932 1933 if not expression: 1934 if self._match(TokenType.COMMAND): 1935 expression = self._parse_as_command(self._prev) 1936 else: 1937 begin = self._match(TokenType.BEGIN) 1938 return_ = self._match_text_seq("RETURN") 1939 1940 if self._match(TokenType.STRING, advance=False): 1941 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1942 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1943 expression = self._parse_string() 1944 extend_props(self._parse_properties()) 1945 else: 1946 expression = self._parse_user_defined_function_expression() 1947 1948 end = self._match_text_seq("END") 1949 1950 if return_: 1951 expression = self.expression(exp.Return, this=expression) 1952 elif create_token.token_type == TokenType.INDEX: 1953 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1954 if not self._match(TokenType.ON): 1955 index = self._parse_id_var() 1956 anonymous = False 1957 else: 1958 index = None 1959 anonymous = True 1960 1961 this = self._parse_index(index=index, anonymous=anonymous) 1962 elif create_token.token_type in self.DB_CREATABLES: 1963 table_parts = self._parse_table_parts( 1964 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1965 ) 1966 1967 # exp.Properties.Location.POST_NAME 1968 self._match(TokenType.COMMA) 1969 extend_props(self._parse_properties(before=True)) 1970 1971 this = self._parse_schema(this=table_parts) 1972 1973 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1974 extend_props(self._parse_properties()) 1975 1976 self._match(TokenType.ALIAS) 1977 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1978 # exp.Properties.Location.POST_ALIAS 1979 extend_props(self._parse_properties()) 1980 1981 if create_token.token_type == TokenType.SEQUENCE: 1982 expression = self._parse_types() 1983 extend_props(self._parse_properties()) 1984 else: 1985 expression = self._parse_ddl_select() 1986 1987 if create_token.token_type == TokenType.TABLE: 1988 # exp.Properties.Location.POST_EXPRESSION 1989 extend_props(self._parse_properties()) 1990 1991 indexes = [] 1992 while True: 1993 index = self._parse_index() 1994 1995 # exp.Properties.Location.POST_INDEX 1996 extend_props(self._parse_properties()) 1997 if not index: 1998 break 1999 else: 2000 self._match(TokenType.COMMA) 2001 indexes.append(index) 2002 elif create_token.token_type == TokenType.VIEW: 2003 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2004 no_schema_binding = True 2005 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2006 extend_props(self._parse_properties()) 2007 2008 shallow = self._match_text_seq("SHALLOW") 2009 2010 if self._match_texts(self.CLONE_KEYWORDS): 2011 copy = self._prev.text.lower() == "copy" 2012 clone = self.expression( 2013 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2014 ) 2015 2016 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2017 return self._parse_as_command(start) 2018 2019 create_kind_text = create_token.text.upper() 2020 return self.expression( 2021 exp.Create, 2022 this=this, 2023 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2024 replace=replace, 2025 refresh=refresh, 2026 unique=unique, 2027 expression=expression, 2028 exists=exists, 2029 properties=properties, 2030 indexes=indexes, 2031 no_schema_binding=no_schema_binding, 2032 begin=begin, 2033 end=end, 2034 clone=clone, 2035 concurrently=concurrently, 2036 clustered=clustered, 2037 ) 2038 2039 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2040 seq = exp.SequenceProperties() 2041 2042 options = [] 2043 index = self._index 2044 2045 while self._curr: 2046 self._match(TokenType.COMMA) 2047 if self._match_text_seq("INCREMENT"): 2048 self._match_text_seq("BY") 2049 self._match_text_seq("=") 2050 seq.set("increment", self._parse_term()) 2051 elif self._match_text_seq("MINVALUE"): 2052 seq.set("minvalue", self._parse_term()) 2053 elif self._match_text_seq("MAXVALUE"): 2054 seq.set("maxvalue", self._parse_term()) 2055 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2056 self._match_text_seq("=") 2057 seq.set("start", self._parse_term()) 2058 elif self._match_text_seq("CACHE"): 2059 # T-SQL allows empty CACHE which is initialized dynamically 2060 seq.set("cache", self._parse_number() or True) 2061 elif self._match_text_seq("OWNED", "BY"): 2062 # "OWNED BY NONE" is the default 2063 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2064 else: 2065 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2066 if opt: 2067 options.append(opt) 2068 else: 2069 break 2070 2071 seq.set("options", options if options else None) 2072 return None if self._index == index else seq 2073 2074 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2075 # only used for teradata currently 2076 self._match(TokenType.COMMA) 2077 2078 kwargs = { 2079 "no": self._match_text_seq("NO"), 2080 "dual": self._match_text_seq("DUAL"), 2081 "before": self._match_text_seq("BEFORE"), 2082 "default": self._match_text_seq("DEFAULT"), 2083 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2084 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2085 "after": self._match_text_seq("AFTER"), 2086 "minimum": self._match_texts(("MIN", "MINIMUM")), 2087 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2088 } 2089 2090 if self._match_texts(self.PROPERTY_PARSERS): 2091 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2092 try: 2093 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2094 except TypeError: 2095 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2096 2097 return None 2098 2099 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2100 return self._parse_wrapped_csv(self._parse_property) 2101 2102 def _parse_property(self) -> t.Optional[exp.Expression]: 2103 if self._match_texts(self.PROPERTY_PARSERS): 2104 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2105 2106 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2107 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2108 2109 if self._match_text_seq("COMPOUND", "SORTKEY"): 2110 return self._parse_sortkey(compound=True) 2111 2112 if self._match_text_seq("SQL", "SECURITY"): 2113 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2114 2115 index = self._index 2116 key = self._parse_column() 2117 2118 if not self._match(TokenType.EQ): 2119 self._retreat(index) 2120 return self._parse_sequence_properties() 2121 2122 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2123 if isinstance(key, exp.Column): 2124 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2125 2126 value = self._parse_bitwise() or self._parse_var(any_token=True) 2127 2128 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2129 if isinstance(value, exp.Column): 2130 value = exp.var(value.name) 2131 2132 return self.expression(exp.Property, this=key, value=value) 2133 2134 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2135 if self._match_text_seq("BY"): 2136 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2137 2138 self._match(TokenType.ALIAS) 2139 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2140 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2141 2142 return self.expression( 2143 exp.FileFormatProperty, 2144 this=( 2145 self.expression( 2146 exp.InputOutputFormat, 2147 input_format=input_format, 2148 output_format=output_format, 2149 ) 2150 if input_format or output_format 2151 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2152 ), 2153 ) 2154 2155 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2156 field = self._parse_field() 2157 if isinstance(field, exp.Identifier) and not field.quoted: 2158 field = exp.var(field) 2159 2160 return field 2161 2162 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2163 self._match(TokenType.EQ) 2164 self._match(TokenType.ALIAS) 2165 2166 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2167 2168 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2169 properties = [] 2170 while True: 2171 if before: 2172 prop = self._parse_property_before() 2173 else: 2174 prop = self._parse_property() 2175 if not prop: 2176 break 2177 for p in ensure_list(prop): 2178 properties.append(p) 2179 2180 if properties: 2181 return self.expression(exp.Properties, expressions=properties) 2182 2183 return None 2184 2185 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2186 return self.expression( 2187 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2188 ) 2189 2190 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2191 if self._match_texts(("DEFINER", "INVOKER")): 2192 security_specifier = self._prev.text.upper() 2193 return self.expression(exp.SecurityProperty, this=security_specifier) 2194 return None 2195 2196 def _parse_settings_property(self) -> exp.SettingsProperty: 2197 return self.expression( 2198 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2199 ) 2200 2201 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2202 if self._index >= 2: 2203 pre_volatile_token = self._tokens[self._index - 2] 2204 else: 2205 pre_volatile_token = None 2206 2207 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2208 return exp.VolatileProperty() 2209 2210 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2211 2212 def _parse_retention_period(self) -> exp.Var: 2213 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2214 number = self._parse_number() 2215 number_str = f"{number} " if number else "" 2216 unit = self._parse_var(any_token=True) 2217 return exp.var(f"{number_str}{unit}") 2218 2219 def _parse_system_versioning_property( 2220 self, with_: bool = False 2221 ) -> exp.WithSystemVersioningProperty: 2222 self._match(TokenType.EQ) 2223 prop = self.expression( 2224 exp.WithSystemVersioningProperty, 2225 **{ # type: ignore 2226 "on": True, 2227 "with": with_, 2228 }, 2229 ) 2230 2231 if self._match_text_seq("OFF"): 2232 prop.set("on", False) 2233 return prop 2234 2235 self._match(TokenType.ON) 2236 if self._match(TokenType.L_PAREN): 2237 while self._curr and not self._match(TokenType.R_PAREN): 2238 if self._match_text_seq("HISTORY_TABLE", "="): 2239 prop.set("this", self._parse_table_parts()) 2240 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2241 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2242 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2243 prop.set("retention_period", self._parse_retention_period()) 2244 2245 self._match(TokenType.COMMA) 2246 2247 return prop 2248 2249 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2250 self._match(TokenType.EQ) 2251 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2252 prop = self.expression(exp.DataDeletionProperty, on=on) 2253 2254 if self._match(TokenType.L_PAREN): 2255 while self._curr and not self._match(TokenType.R_PAREN): 2256 if self._match_text_seq("FILTER_COLUMN", "="): 2257 prop.set("filter_column", self._parse_column()) 2258 elif self._match_text_seq("RETENTION_PERIOD", "="): 2259 prop.set("retention_period", self._parse_retention_period()) 2260 2261 self._match(TokenType.COMMA) 2262 2263 return prop 2264 2265 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2266 kind = "HASH" 2267 expressions: t.Optional[t.List[exp.Expression]] = None 2268 if self._match_text_seq("BY", "HASH"): 2269 expressions = self._parse_wrapped_csv(self._parse_id_var) 2270 elif self._match_text_seq("BY", "RANDOM"): 2271 kind = "RANDOM" 2272 2273 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2274 buckets: t.Optional[exp.Expression] = None 2275 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2276 buckets = self._parse_number() 2277 2278 return self.expression( 2279 exp.DistributedByProperty, 2280 expressions=expressions, 2281 kind=kind, 2282 buckets=buckets, 2283 order=self._parse_order(), 2284 ) 2285 2286 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2287 self._match_text_seq("KEY") 2288 expressions = self._parse_wrapped_id_vars() 2289 return self.expression(expr_type, expressions=expressions) 2290 2291 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2292 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2293 prop = self._parse_system_versioning_property(with_=True) 2294 self._match_r_paren() 2295 return prop 2296 2297 if self._match(TokenType.L_PAREN, advance=False): 2298 return self._parse_wrapped_properties() 2299 2300 if self._match_text_seq("JOURNAL"): 2301 return self._parse_withjournaltable() 2302 2303 if self._match_texts(self.VIEW_ATTRIBUTES): 2304 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2305 2306 if self._match_text_seq("DATA"): 2307 return self._parse_withdata(no=False) 2308 elif self._match_text_seq("NO", "DATA"): 2309 return self._parse_withdata(no=True) 2310 2311 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2312 return self._parse_serde_properties(with_=True) 2313 2314 if self._match(TokenType.SCHEMA): 2315 return self.expression( 2316 exp.WithSchemaBindingProperty, 2317 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2318 ) 2319 2320 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2321 return self.expression( 2322 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2323 ) 2324 2325 if not self._next: 2326 return None 2327 2328 return self._parse_withisolatedloading() 2329 2330 def _parse_procedure_option(self) -> exp.Expression | None: 2331 if self._match_text_seq("EXECUTE", "AS"): 2332 return self.expression( 2333 exp.ExecuteAsProperty, 2334 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2335 or self._parse_string(), 2336 ) 2337 2338 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2339 2340 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2341 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2342 self._match(TokenType.EQ) 2343 2344 user = self._parse_id_var() 2345 self._match(TokenType.PARAMETER) 2346 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2347 2348 if not user or not host: 2349 return None 2350 2351 return exp.DefinerProperty(this=f"{user}@{host}") 2352 2353 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2354 self._match(TokenType.TABLE) 2355 self._match(TokenType.EQ) 2356 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2357 2358 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2359 return self.expression(exp.LogProperty, no=no) 2360 2361 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2362 return self.expression(exp.JournalProperty, **kwargs) 2363 2364 def _parse_checksum(self) -> exp.ChecksumProperty: 2365 self._match(TokenType.EQ) 2366 2367 on = None 2368 if self._match(TokenType.ON): 2369 on = True 2370 elif self._match_text_seq("OFF"): 2371 on = False 2372 2373 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2374 2375 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2376 return self.expression( 2377 exp.Cluster, 2378 expressions=( 2379 self._parse_wrapped_csv(self._parse_ordered) 2380 if wrapped 2381 else self._parse_csv(self._parse_ordered) 2382 ), 2383 ) 2384 2385 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2386 self._match_text_seq("BY") 2387 2388 self._match_l_paren() 2389 expressions = self._parse_csv(self._parse_column) 2390 self._match_r_paren() 2391 2392 if self._match_text_seq("SORTED", "BY"): 2393 self._match_l_paren() 2394 sorted_by = self._parse_csv(self._parse_ordered) 2395 self._match_r_paren() 2396 else: 2397 sorted_by = None 2398 2399 self._match(TokenType.INTO) 2400 buckets = self._parse_number() 2401 self._match_text_seq("BUCKETS") 2402 2403 return self.expression( 2404 exp.ClusteredByProperty, 2405 expressions=expressions, 2406 sorted_by=sorted_by, 2407 buckets=buckets, 2408 ) 2409 2410 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2411 if not self._match_text_seq("GRANTS"): 2412 self._retreat(self._index - 1) 2413 return None 2414 2415 return self.expression(exp.CopyGrantsProperty) 2416 2417 def _parse_freespace(self) -> exp.FreespaceProperty: 2418 self._match(TokenType.EQ) 2419 return self.expression( 2420 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2421 ) 2422 2423 def _parse_mergeblockratio( 2424 self, no: bool = False, default: bool = False 2425 ) -> exp.MergeBlockRatioProperty: 2426 if self._match(TokenType.EQ): 2427 return self.expression( 2428 exp.MergeBlockRatioProperty, 2429 this=self._parse_number(), 2430 percent=self._match(TokenType.PERCENT), 2431 ) 2432 2433 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2434 2435 def _parse_datablocksize( 2436 self, 2437 default: t.Optional[bool] = None, 2438 minimum: t.Optional[bool] = None, 2439 maximum: t.Optional[bool] = None, 2440 ) -> exp.DataBlocksizeProperty: 2441 self._match(TokenType.EQ) 2442 size = self._parse_number() 2443 2444 units = None 2445 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2446 units = self._prev.text 2447 2448 return self.expression( 2449 exp.DataBlocksizeProperty, 2450 size=size, 2451 units=units, 2452 default=default, 2453 minimum=minimum, 2454 maximum=maximum, 2455 ) 2456 2457 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2458 self._match(TokenType.EQ) 2459 always = self._match_text_seq("ALWAYS") 2460 manual = self._match_text_seq("MANUAL") 2461 never = self._match_text_seq("NEVER") 2462 default = self._match_text_seq("DEFAULT") 2463 2464 autotemp = None 2465 if self._match_text_seq("AUTOTEMP"): 2466 autotemp = self._parse_schema() 2467 2468 return self.expression( 2469 exp.BlockCompressionProperty, 2470 always=always, 2471 manual=manual, 2472 never=never, 2473 default=default, 2474 autotemp=autotemp, 2475 ) 2476 2477 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2478 index = self._index 2479 no = self._match_text_seq("NO") 2480 concurrent = self._match_text_seq("CONCURRENT") 2481 2482 if not self._match_text_seq("ISOLATED", "LOADING"): 2483 self._retreat(index) 2484 return None 2485 2486 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2487 return self.expression( 2488 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2489 ) 2490 2491 def _parse_locking(self) -> exp.LockingProperty: 2492 if self._match(TokenType.TABLE): 2493 kind = "TABLE" 2494 elif self._match(TokenType.VIEW): 2495 kind = "VIEW" 2496 elif self._match(TokenType.ROW): 2497 kind = "ROW" 2498 elif self._match_text_seq("DATABASE"): 2499 kind = "DATABASE" 2500 else: 2501 kind = None 2502 2503 if kind in ("DATABASE", "TABLE", "VIEW"): 2504 this = self._parse_table_parts() 2505 else: 2506 this = None 2507 2508 if self._match(TokenType.FOR): 2509 for_or_in = "FOR" 2510 elif self._match(TokenType.IN): 2511 for_or_in = "IN" 2512 else: 2513 for_or_in = None 2514 2515 if self._match_text_seq("ACCESS"): 2516 lock_type = "ACCESS" 2517 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2518 lock_type = "EXCLUSIVE" 2519 elif self._match_text_seq("SHARE"): 2520 lock_type = "SHARE" 2521 elif self._match_text_seq("READ"): 2522 lock_type = "READ" 2523 elif self._match_text_seq("WRITE"): 2524 lock_type = "WRITE" 2525 elif self._match_text_seq("CHECKSUM"): 2526 lock_type = "CHECKSUM" 2527 else: 2528 lock_type = None 2529 2530 override = self._match_text_seq("OVERRIDE") 2531 2532 return self.expression( 2533 exp.LockingProperty, 2534 this=this, 2535 kind=kind, 2536 for_or_in=for_or_in, 2537 lock_type=lock_type, 2538 override=override, 2539 ) 2540 2541 def _parse_partition_by(self) -> t.List[exp.Expression]: 2542 if self._match(TokenType.PARTITION_BY): 2543 return self._parse_csv(self._parse_assignment) 2544 return [] 2545 2546 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2547 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2548 if self._match_text_seq("MINVALUE"): 2549 return exp.var("MINVALUE") 2550 if self._match_text_seq("MAXVALUE"): 2551 return exp.var("MAXVALUE") 2552 return self._parse_bitwise() 2553 2554 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2555 expression = None 2556 from_expressions = None 2557 to_expressions = None 2558 2559 if self._match(TokenType.IN): 2560 this = self._parse_wrapped_csv(self._parse_bitwise) 2561 elif self._match(TokenType.FROM): 2562 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2563 self._match_text_seq("TO") 2564 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2565 elif self._match_text_seq("WITH", "(", "MODULUS"): 2566 this = self._parse_number() 2567 self._match_text_seq(",", "REMAINDER") 2568 expression = self._parse_number() 2569 self._match_r_paren() 2570 else: 2571 self.raise_error("Failed to parse partition bound spec.") 2572 2573 return self.expression( 2574 exp.PartitionBoundSpec, 2575 this=this, 2576 expression=expression, 2577 from_expressions=from_expressions, 2578 to_expressions=to_expressions, 2579 ) 2580 2581 # https://www.postgresql.org/docs/current/sql-createtable.html 2582 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2583 if not self._match_text_seq("OF"): 2584 self._retreat(self._index - 1) 2585 return None 2586 2587 this = self._parse_table(schema=True) 2588 2589 if self._match(TokenType.DEFAULT): 2590 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2591 elif self._match_text_seq("FOR", "VALUES"): 2592 expression = self._parse_partition_bound_spec() 2593 else: 2594 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2595 2596 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2597 2598 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2599 self._match(TokenType.EQ) 2600 return self.expression( 2601 exp.PartitionedByProperty, 2602 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2603 ) 2604 2605 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2606 if self._match_text_seq("AND", "STATISTICS"): 2607 statistics = True 2608 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2609 statistics = False 2610 else: 2611 statistics = None 2612 2613 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2614 2615 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2616 if self._match_text_seq("SQL"): 2617 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2618 return None 2619 2620 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2621 if self._match_text_seq("SQL", "DATA"): 2622 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2623 return None 2624 2625 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2626 if self._match_text_seq("PRIMARY", "INDEX"): 2627 return exp.NoPrimaryIndexProperty() 2628 if self._match_text_seq("SQL"): 2629 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2630 return None 2631 2632 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2633 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2634 return exp.OnCommitProperty() 2635 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2636 return exp.OnCommitProperty(delete=True) 2637 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2638 2639 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2640 if self._match_text_seq("SQL", "DATA"): 2641 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2642 return None 2643 2644 def _parse_distkey(self) -> exp.DistKeyProperty: 2645 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2646 2647 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2648 table = self._parse_table(schema=True) 2649 2650 options = [] 2651 while self._match_texts(("INCLUDING", "EXCLUDING")): 2652 this = self._prev.text.upper() 2653 2654 id_var = self._parse_id_var() 2655 if not id_var: 2656 return None 2657 2658 options.append( 2659 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2660 ) 2661 2662 return self.expression(exp.LikeProperty, this=table, expressions=options) 2663 2664 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2665 return self.expression( 2666 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2667 ) 2668 2669 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2670 self._match(TokenType.EQ) 2671 return self.expression( 2672 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2673 ) 2674 2675 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2676 self._match_text_seq("WITH", "CONNECTION") 2677 return self.expression( 2678 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2679 ) 2680 2681 def _parse_returns(self) -> exp.ReturnsProperty: 2682 value: t.Optional[exp.Expression] 2683 null = None 2684 is_table = self._match(TokenType.TABLE) 2685 2686 if is_table: 2687 if self._match(TokenType.LT): 2688 value = self.expression( 2689 exp.Schema, 2690 this="TABLE", 2691 expressions=self._parse_csv(self._parse_struct_types), 2692 ) 2693 if not self._match(TokenType.GT): 2694 self.raise_error("Expecting >") 2695 else: 2696 value = self._parse_schema(exp.var("TABLE")) 2697 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2698 null = True 2699 value = None 2700 else: 2701 value = self._parse_types() 2702 2703 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2704 2705 def _parse_describe(self) -> exp.Describe: 2706 kind = self._match_set(self.CREATABLES) and self._prev.text 2707 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2708 if self._match(TokenType.DOT): 2709 style = None 2710 self._retreat(self._index - 2) 2711 2712 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2713 2714 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2715 this = self._parse_statement() 2716 else: 2717 this = self._parse_table(schema=True) 2718 2719 properties = self._parse_properties() 2720 expressions = properties.expressions if properties else None 2721 partition = self._parse_partition() 2722 return self.expression( 2723 exp.Describe, 2724 this=this, 2725 style=style, 2726 kind=kind, 2727 expressions=expressions, 2728 partition=partition, 2729 format=format, 2730 ) 2731 2732 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2733 kind = self._prev.text.upper() 2734 expressions = [] 2735 2736 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2737 if self._match(TokenType.WHEN): 2738 expression = self._parse_disjunction() 2739 self._match(TokenType.THEN) 2740 else: 2741 expression = None 2742 2743 else_ = self._match(TokenType.ELSE) 2744 2745 if not self._match(TokenType.INTO): 2746 return None 2747 2748 return self.expression( 2749 exp.ConditionalInsert, 2750 this=self.expression( 2751 exp.Insert, 2752 this=self._parse_table(schema=True), 2753 expression=self._parse_derived_table_values(), 2754 ), 2755 expression=expression, 2756 else_=else_, 2757 ) 2758 2759 expression = parse_conditional_insert() 2760 while expression is not None: 2761 expressions.append(expression) 2762 expression = parse_conditional_insert() 2763 2764 return self.expression( 2765 exp.MultitableInserts, 2766 kind=kind, 2767 comments=comments, 2768 expressions=expressions, 2769 source=self._parse_table(), 2770 ) 2771 2772 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2773 comments = [] 2774 hint = self._parse_hint() 2775 overwrite = self._match(TokenType.OVERWRITE) 2776 ignore = self._match(TokenType.IGNORE) 2777 local = self._match_text_seq("LOCAL") 2778 alternative = None 2779 is_function = None 2780 2781 if self._match_text_seq("DIRECTORY"): 2782 this: t.Optional[exp.Expression] = self.expression( 2783 exp.Directory, 2784 this=self._parse_var_or_string(), 2785 local=local, 2786 row_format=self._parse_row_format(match_row=True), 2787 ) 2788 else: 2789 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2790 comments += ensure_list(self._prev_comments) 2791 return self._parse_multitable_inserts(comments) 2792 2793 if self._match(TokenType.OR): 2794 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2795 2796 self._match(TokenType.INTO) 2797 comments += ensure_list(self._prev_comments) 2798 self._match(TokenType.TABLE) 2799 is_function = self._match(TokenType.FUNCTION) 2800 2801 this = ( 2802 self._parse_table(schema=True, parse_partition=True) 2803 if not is_function 2804 else self._parse_function() 2805 ) 2806 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2807 this.set("alias", self._parse_table_alias()) 2808 2809 returning = self._parse_returning() 2810 2811 return self.expression( 2812 exp.Insert, 2813 comments=comments, 2814 hint=hint, 2815 is_function=is_function, 2816 this=this, 2817 stored=self._match_text_seq("STORED") and self._parse_stored(), 2818 by_name=self._match_text_seq("BY", "NAME"), 2819 exists=self._parse_exists(), 2820 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2821 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2822 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2823 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2824 conflict=self._parse_on_conflict(), 2825 returning=returning or self._parse_returning(), 2826 overwrite=overwrite, 2827 alternative=alternative, 2828 ignore=ignore, 2829 source=self._match(TokenType.TABLE) and self._parse_table(), 2830 ) 2831 2832 def _parse_kill(self) -> exp.Kill: 2833 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2834 2835 return self.expression( 2836 exp.Kill, 2837 this=self._parse_primary(), 2838 kind=kind, 2839 ) 2840 2841 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2842 conflict = self._match_text_seq("ON", "CONFLICT") 2843 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2844 2845 if not conflict and not duplicate: 2846 return None 2847 2848 conflict_keys = None 2849 constraint = None 2850 2851 if conflict: 2852 if self._match_text_seq("ON", "CONSTRAINT"): 2853 constraint = self._parse_id_var() 2854 elif self._match(TokenType.L_PAREN): 2855 conflict_keys = self._parse_csv(self._parse_id_var) 2856 self._match_r_paren() 2857 2858 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2859 if self._prev.token_type == TokenType.UPDATE: 2860 self._match(TokenType.SET) 2861 expressions = self._parse_csv(self._parse_equality) 2862 else: 2863 expressions = None 2864 2865 return self.expression( 2866 exp.OnConflict, 2867 duplicate=duplicate, 2868 expressions=expressions, 2869 action=action, 2870 conflict_keys=conflict_keys, 2871 constraint=constraint, 2872 where=self._parse_where(), 2873 ) 2874 2875 def _parse_returning(self) -> t.Optional[exp.Returning]: 2876 if not self._match(TokenType.RETURNING): 2877 return None 2878 return self.expression( 2879 exp.Returning, 2880 expressions=self._parse_csv(self._parse_expression), 2881 into=self._match(TokenType.INTO) and self._parse_table_part(), 2882 ) 2883 2884 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2885 if not self._match(TokenType.FORMAT): 2886 return None 2887 return self._parse_row_format() 2888 2889 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2890 index = self._index 2891 with_ = with_ or self._match_text_seq("WITH") 2892 2893 if not self._match(TokenType.SERDE_PROPERTIES): 2894 self._retreat(index) 2895 return None 2896 return self.expression( 2897 exp.SerdeProperties, 2898 **{ # type: ignore 2899 "expressions": self._parse_wrapped_properties(), 2900 "with": with_, 2901 }, 2902 ) 2903 2904 def _parse_row_format( 2905 self, match_row: bool = False 2906 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2907 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2908 return None 2909 2910 if self._match_text_seq("SERDE"): 2911 this = self._parse_string() 2912 2913 serde_properties = self._parse_serde_properties() 2914 2915 return self.expression( 2916 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2917 ) 2918 2919 self._match_text_seq("DELIMITED") 2920 2921 kwargs = {} 2922 2923 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2924 kwargs["fields"] = self._parse_string() 2925 if self._match_text_seq("ESCAPED", "BY"): 2926 kwargs["escaped"] = self._parse_string() 2927 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2928 kwargs["collection_items"] = self._parse_string() 2929 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2930 kwargs["map_keys"] = self._parse_string() 2931 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2932 kwargs["lines"] = self._parse_string() 2933 if self._match_text_seq("NULL", "DEFINED", "AS"): 2934 kwargs["null"] = self._parse_string() 2935 2936 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2937 2938 def _parse_load(self) -> exp.LoadData | exp.Command: 2939 if self._match_text_seq("DATA"): 2940 local = self._match_text_seq("LOCAL") 2941 self._match_text_seq("INPATH") 2942 inpath = self._parse_string() 2943 overwrite = self._match(TokenType.OVERWRITE) 2944 self._match_pair(TokenType.INTO, TokenType.TABLE) 2945 2946 return self.expression( 2947 exp.LoadData, 2948 this=self._parse_table(schema=True), 2949 local=local, 2950 overwrite=overwrite, 2951 inpath=inpath, 2952 partition=self._parse_partition(), 2953 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2954 serde=self._match_text_seq("SERDE") and self._parse_string(), 2955 ) 2956 return self._parse_as_command(self._prev) 2957 2958 def _parse_delete(self) -> exp.Delete: 2959 # This handles MySQL's "Multiple-Table Syntax" 2960 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2961 tables = None 2962 if not self._match(TokenType.FROM, advance=False): 2963 tables = self._parse_csv(self._parse_table) or None 2964 2965 returning = self._parse_returning() 2966 2967 return self.expression( 2968 exp.Delete, 2969 tables=tables, 2970 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2971 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2972 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2973 where=self._parse_where(), 2974 returning=returning or self._parse_returning(), 2975 limit=self._parse_limit(), 2976 ) 2977 2978 def _parse_update(self) -> exp.Update: 2979 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2980 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2981 returning = self._parse_returning() 2982 return self.expression( 2983 exp.Update, 2984 **{ # type: ignore 2985 "this": this, 2986 "expressions": expressions, 2987 "from": self._parse_from(joins=True), 2988 "where": self._parse_where(), 2989 "returning": returning or self._parse_returning(), 2990 "order": self._parse_order(), 2991 "limit": self._parse_limit(), 2992 }, 2993 ) 2994 2995 def _parse_use(self) -> exp.Use: 2996 return self.expression( 2997 exp.Use, 2998 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 2999 this=self._parse_table(schema=False), 3000 ) 3001 3002 def _parse_uncache(self) -> exp.Uncache: 3003 if not self._match(TokenType.TABLE): 3004 self.raise_error("Expecting TABLE after UNCACHE") 3005 3006 return self.expression( 3007 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3008 ) 3009 3010 def _parse_cache(self) -> exp.Cache: 3011 lazy = self._match_text_seq("LAZY") 3012 self._match(TokenType.TABLE) 3013 table = self._parse_table(schema=True) 3014 3015 options = [] 3016 if self._match_text_seq("OPTIONS"): 3017 self._match_l_paren() 3018 k = self._parse_string() 3019 self._match(TokenType.EQ) 3020 v = self._parse_string() 3021 options = [k, v] 3022 self._match_r_paren() 3023 3024 self._match(TokenType.ALIAS) 3025 return self.expression( 3026 exp.Cache, 3027 this=table, 3028 lazy=lazy, 3029 options=options, 3030 expression=self._parse_select(nested=True), 3031 ) 3032 3033 def _parse_partition(self) -> t.Optional[exp.Partition]: 3034 if not self._match_texts(self.PARTITION_KEYWORDS): 3035 return None 3036 3037 return self.expression( 3038 exp.Partition, 3039 subpartition=self._prev.text.upper() == "SUBPARTITION", 3040 expressions=self._parse_wrapped_csv(self._parse_assignment), 3041 ) 3042 3043 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3044 def _parse_value_expression() -> t.Optional[exp.Expression]: 3045 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3046 return exp.var(self._prev.text.upper()) 3047 return self._parse_expression() 3048 3049 if self._match(TokenType.L_PAREN): 3050 expressions = self._parse_csv(_parse_value_expression) 3051 self._match_r_paren() 3052 return self.expression(exp.Tuple, expressions=expressions) 3053 3054 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3055 expression = self._parse_expression() 3056 if expression: 3057 return self.expression(exp.Tuple, expressions=[expression]) 3058 return None 3059 3060 def _parse_projections(self) -> t.List[exp.Expression]: 3061 return self._parse_expressions() 3062 3063 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3064 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3065 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3066 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3067 ) 3068 elif self._match(TokenType.FROM): 3069 from_ = self._parse_from(skip_from_token=True) 3070 # Support parentheses for duckdb FROM-first syntax 3071 select = self._parse_select() 3072 if select: 3073 select.set("from", from_) 3074 this = select 3075 else: 3076 this = exp.select("*").from_(t.cast(exp.From, from_)) 3077 else: 3078 this = ( 3079 self._parse_table() 3080 if table 3081 else self._parse_select(nested=True, parse_set_operation=False) 3082 ) 3083 3084 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3085 # in case a modifier (e.g. join) is following 3086 if table and isinstance(this, exp.Values) and this.alias: 3087 alias = this.args["alias"].pop() 3088 this = exp.Table(this=this, alias=alias) 3089 3090 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3091 3092 return this 3093 3094 def _parse_select( 3095 self, 3096 nested: bool = False, 3097 table: bool = False, 3098 parse_subquery_alias: bool = True, 3099 parse_set_operation: bool = True, 3100 ) -> t.Optional[exp.Expression]: 3101 cte = self._parse_with() 3102 3103 if cte: 3104 this = self._parse_statement() 3105 3106 if not this: 3107 self.raise_error("Failed to parse any statement following CTE") 3108 return cte 3109 3110 if "with" in this.arg_types: 3111 this.set("with", cte) 3112 else: 3113 self.raise_error(f"{this.key} does not support CTE") 3114 this = cte 3115 3116 return this 3117 3118 # duckdb supports leading with FROM x 3119 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3120 3121 if self._match(TokenType.SELECT): 3122 comments = self._prev_comments 3123 3124 hint = self._parse_hint() 3125 3126 if self._next and not self._next.token_type == TokenType.DOT: 3127 all_ = self._match(TokenType.ALL) 3128 distinct = self._match_set(self.DISTINCT_TOKENS) 3129 else: 3130 all_, distinct = None, None 3131 3132 kind = ( 3133 self._match(TokenType.ALIAS) 3134 and self._match_texts(("STRUCT", "VALUE")) 3135 and self._prev.text.upper() 3136 ) 3137 3138 if distinct: 3139 distinct = self.expression( 3140 exp.Distinct, 3141 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3142 ) 3143 3144 if all_ and distinct: 3145 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3146 3147 operation_modifiers = [] 3148 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3149 operation_modifiers.append(exp.var(self._prev.text.upper())) 3150 3151 limit = self._parse_limit(top=True) 3152 projections = self._parse_projections() 3153 3154 this = self.expression( 3155 exp.Select, 3156 kind=kind, 3157 hint=hint, 3158 distinct=distinct, 3159 expressions=projections, 3160 limit=limit, 3161 operation_modifiers=operation_modifiers or None, 3162 ) 3163 this.comments = comments 3164 3165 into = self._parse_into() 3166 if into: 3167 this.set("into", into) 3168 3169 if not from_: 3170 from_ = self._parse_from() 3171 3172 if from_: 3173 this.set("from", from_) 3174 3175 this = self._parse_query_modifiers(this) 3176 elif (table or nested) and self._match(TokenType.L_PAREN): 3177 this = self._parse_wrapped_select(table=table) 3178 3179 # We return early here so that the UNION isn't attached to the subquery by the 3180 # following call to _parse_set_operations, but instead becomes the parent node 3181 self._match_r_paren() 3182 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3183 elif self._match(TokenType.VALUES, advance=False): 3184 this = self._parse_derived_table_values() 3185 elif from_: 3186 this = exp.select("*").from_(from_.this, copy=False) 3187 elif self._match(TokenType.SUMMARIZE): 3188 table = self._match(TokenType.TABLE) 3189 this = self._parse_select() or self._parse_string() or self._parse_table() 3190 return self.expression(exp.Summarize, this=this, table=table) 3191 elif self._match(TokenType.DESCRIBE): 3192 this = self._parse_describe() 3193 elif self._match_text_seq("STREAM"): 3194 this = self._parse_function() 3195 if this: 3196 this = self.expression(exp.Stream, this=this) 3197 else: 3198 self._retreat(self._index - 1) 3199 else: 3200 this = None 3201 3202 return self._parse_set_operations(this) if parse_set_operation else this 3203 3204 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3205 self._match_text_seq("SEARCH") 3206 3207 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3208 3209 if not kind: 3210 return None 3211 3212 self._match_text_seq("FIRST", "BY") 3213 3214 return self.expression( 3215 exp.RecursiveWithSearch, 3216 kind=kind, 3217 this=self._parse_id_var(), 3218 expression=self._match_text_seq("SET") and self._parse_id_var(), 3219 using=self._match_text_seq("USING") and self._parse_id_var(), 3220 ) 3221 3222 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3223 if not skip_with_token and not self._match(TokenType.WITH): 3224 return None 3225 3226 comments = self._prev_comments 3227 recursive = self._match(TokenType.RECURSIVE) 3228 3229 last_comments = None 3230 expressions = [] 3231 while True: 3232 cte = self._parse_cte() 3233 if isinstance(cte, exp.CTE): 3234 expressions.append(cte) 3235 if last_comments: 3236 cte.add_comments(last_comments) 3237 3238 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3239 break 3240 else: 3241 self._match(TokenType.WITH) 3242 3243 last_comments = self._prev_comments 3244 3245 return self.expression( 3246 exp.With, 3247 comments=comments, 3248 expressions=expressions, 3249 recursive=recursive, 3250 search=self._parse_recursive_with_search(), 3251 ) 3252 3253 def _parse_cte(self) -> t.Optional[exp.CTE]: 3254 index = self._index 3255 3256 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3257 if not alias or not alias.this: 3258 self.raise_error("Expected CTE to have alias") 3259 3260 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3261 self._retreat(index) 3262 return None 3263 3264 comments = self._prev_comments 3265 3266 if self._match_text_seq("NOT", "MATERIALIZED"): 3267 materialized = False 3268 elif self._match_text_seq("MATERIALIZED"): 3269 materialized = True 3270 else: 3271 materialized = None 3272 3273 cte = self.expression( 3274 exp.CTE, 3275 this=self._parse_wrapped(self._parse_statement), 3276 alias=alias, 3277 materialized=materialized, 3278 comments=comments, 3279 ) 3280 3281 if isinstance(cte.this, exp.Values): 3282 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3283 3284 return cte 3285 3286 def _parse_table_alias( 3287 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3288 ) -> t.Optional[exp.TableAlias]: 3289 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3290 # so this section tries to parse the clause version and if it fails, it treats the token 3291 # as an identifier (alias) 3292 if self._can_parse_limit_or_offset(): 3293 return None 3294 3295 any_token = self._match(TokenType.ALIAS) 3296 alias = ( 3297 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3298 or self._parse_string_as_identifier() 3299 ) 3300 3301 index = self._index 3302 if self._match(TokenType.L_PAREN): 3303 columns = self._parse_csv(self._parse_function_parameter) 3304 self._match_r_paren() if columns else self._retreat(index) 3305 else: 3306 columns = None 3307 3308 if not alias and not columns: 3309 return None 3310 3311 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3312 3313 # We bubble up comments from the Identifier to the TableAlias 3314 if isinstance(alias, exp.Identifier): 3315 table_alias.add_comments(alias.pop_comments()) 3316 3317 return table_alias 3318 3319 def _parse_subquery( 3320 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3321 ) -> t.Optional[exp.Subquery]: 3322 if not this: 3323 return None 3324 3325 return self.expression( 3326 exp.Subquery, 3327 this=this, 3328 pivots=self._parse_pivots(), 3329 alias=self._parse_table_alias() if parse_alias else None, 3330 sample=self._parse_table_sample(), 3331 ) 3332 3333 def _implicit_unnests_to_explicit(self, this: E) -> E: 3334 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3335 3336 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3337 for i, join in enumerate(this.args.get("joins") or []): 3338 table = join.this 3339 normalized_table = table.copy() 3340 normalized_table.meta["maybe_column"] = True 3341 normalized_table = _norm(normalized_table, dialect=self.dialect) 3342 3343 if isinstance(table, exp.Table) and not join.args.get("on"): 3344 if normalized_table.parts[0].name in refs: 3345 table_as_column = table.to_column() 3346 unnest = exp.Unnest(expressions=[table_as_column]) 3347 3348 # Table.to_column creates a parent Alias node that we want to convert to 3349 # a TableAlias and attach to the Unnest, so it matches the parser's output 3350 if isinstance(table.args.get("alias"), exp.TableAlias): 3351 table_as_column.replace(table_as_column.this) 3352 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3353 3354 table.replace(unnest) 3355 3356 refs.add(normalized_table.alias_or_name) 3357 3358 return this 3359 3360 def _parse_query_modifiers( 3361 self, this: t.Optional[exp.Expression] 3362 ) -> t.Optional[exp.Expression]: 3363 if isinstance(this, self.MODIFIABLES): 3364 for join in self._parse_joins(): 3365 this.append("joins", join) 3366 for lateral in iter(self._parse_lateral, None): 3367 this.append("laterals", lateral) 3368 3369 while True: 3370 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3371 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3372 key, expression = parser(self) 3373 3374 if expression: 3375 this.set(key, expression) 3376 if key == "limit": 3377 offset = expression.args.pop("offset", None) 3378 3379 if offset: 3380 offset = exp.Offset(expression=offset) 3381 this.set("offset", offset) 3382 3383 limit_by_expressions = expression.expressions 3384 expression.set("expressions", None) 3385 offset.set("expressions", limit_by_expressions) 3386 continue 3387 break 3388 3389 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3390 this = self._implicit_unnests_to_explicit(this) 3391 3392 return this 3393 3394 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3395 start = self._curr 3396 while self._curr: 3397 self._advance() 3398 3399 end = self._tokens[self._index - 1] 3400 return exp.Hint(expressions=[self._find_sql(start, end)]) 3401 3402 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3403 return self._parse_function_call() 3404 3405 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3406 start_index = self._index 3407 should_fallback_to_string = False 3408 3409 hints = [] 3410 try: 3411 for hint in iter( 3412 lambda: self._parse_csv( 3413 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3414 ), 3415 [], 3416 ): 3417 hints.extend(hint) 3418 except ParseError: 3419 should_fallback_to_string = True 3420 3421 if should_fallback_to_string or self._curr: 3422 self._retreat(start_index) 3423 return self._parse_hint_fallback_to_string() 3424 3425 return self.expression(exp.Hint, expressions=hints) 3426 3427 def _parse_hint(self) -> t.Optional[exp.Hint]: 3428 if self._match(TokenType.HINT) and self._prev_comments: 3429 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3430 3431 return None 3432 3433 def _parse_into(self) -> t.Optional[exp.Into]: 3434 if not self._match(TokenType.INTO): 3435 return None 3436 3437 temp = self._match(TokenType.TEMPORARY) 3438 unlogged = self._match_text_seq("UNLOGGED") 3439 self._match(TokenType.TABLE) 3440 3441 return self.expression( 3442 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3443 ) 3444 3445 def _parse_from( 3446 self, joins: bool = False, skip_from_token: bool = False 3447 ) -> t.Optional[exp.From]: 3448 if not skip_from_token and not self._match(TokenType.FROM): 3449 return None 3450 3451 return self.expression( 3452 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3453 ) 3454 3455 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3456 return self.expression( 3457 exp.MatchRecognizeMeasure, 3458 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3459 this=self._parse_expression(), 3460 ) 3461 3462 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3463 if not self._match(TokenType.MATCH_RECOGNIZE): 3464 return None 3465 3466 self._match_l_paren() 3467 3468 partition = self._parse_partition_by() 3469 order = self._parse_order() 3470 3471 measures = ( 3472 self._parse_csv(self._parse_match_recognize_measure) 3473 if self._match_text_seq("MEASURES") 3474 else None 3475 ) 3476 3477 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3478 rows = exp.var("ONE ROW PER MATCH") 3479 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3480 text = "ALL ROWS PER MATCH" 3481 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3482 text += " SHOW EMPTY MATCHES" 3483 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3484 text += " OMIT EMPTY MATCHES" 3485 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3486 text += " WITH UNMATCHED ROWS" 3487 rows = exp.var(text) 3488 else: 3489 rows = None 3490 3491 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3492 text = "AFTER MATCH SKIP" 3493 if self._match_text_seq("PAST", "LAST", "ROW"): 3494 text += " PAST LAST ROW" 3495 elif self._match_text_seq("TO", "NEXT", "ROW"): 3496 text += " TO NEXT ROW" 3497 elif self._match_text_seq("TO", "FIRST"): 3498 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3499 elif self._match_text_seq("TO", "LAST"): 3500 text += f" TO LAST {self._advance_any().text}" # type: ignore 3501 after = exp.var(text) 3502 else: 3503 after = None 3504 3505 if self._match_text_seq("PATTERN"): 3506 self._match_l_paren() 3507 3508 if not self._curr: 3509 self.raise_error("Expecting )", self._curr) 3510 3511 paren = 1 3512 start = self._curr 3513 3514 while self._curr and paren > 0: 3515 if self._curr.token_type == TokenType.L_PAREN: 3516 paren += 1 3517 if self._curr.token_type == TokenType.R_PAREN: 3518 paren -= 1 3519 3520 end = self._prev 3521 self._advance() 3522 3523 if paren > 0: 3524 self.raise_error("Expecting )", self._curr) 3525 3526 pattern = exp.var(self._find_sql(start, end)) 3527 else: 3528 pattern = None 3529 3530 define = ( 3531 self._parse_csv(self._parse_name_as_expression) 3532 if self._match_text_seq("DEFINE") 3533 else None 3534 ) 3535 3536 self._match_r_paren() 3537 3538 return self.expression( 3539 exp.MatchRecognize, 3540 partition_by=partition, 3541 order=order, 3542 measures=measures, 3543 rows=rows, 3544 after=after, 3545 pattern=pattern, 3546 define=define, 3547 alias=self._parse_table_alias(), 3548 ) 3549 3550 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3551 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3552 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3553 cross_apply = False 3554 3555 if cross_apply is not None: 3556 this = self._parse_select(table=True) 3557 view = None 3558 outer = None 3559 elif self._match(TokenType.LATERAL): 3560 this = self._parse_select(table=True) 3561 view = self._match(TokenType.VIEW) 3562 outer = self._match(TokenType.OUTER) 3563 else: 3564 return None 3565 3566 if not this: 3567 this = ( 3568 self._parse_unnest() 3569 or self._parse_function() 3570 or self._parse_id_var(any_token=False) 3571 ) 3572 3573 while self._match(TokenType.DOT): 3574 this = exp.Dot( 3575 this=this, 3576 expression=self._parse_function() or self._parse_id_var(any_token=False), 3577 ) 3578 3579 ordinality: t.Optional[bool] = None 3580 3581 if view: 3582 table = self._parse_id_var(any_token=False) 3583 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3584 table_alias: t.Optional[exp.TableAlias] = self.expression( 3585 exp.TableAlias, this=table, columns=columns 3586 ) 3587 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3588 # We move the alias from the lateral's child node to the lateral itself 3589 table_alias = this.args["alias"].pop() 3590 else: 3591 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3592 table_alias = self._parse_table_alias() 3593 3594 return self.expression( 3595 exp.Lateral, 3596 this=this, 3597 view=view, 3598 outer=outer, 3599 alias=table_alias, 3600 cross_apply=cross_apply, 3601 ordinality=ordinality, 3602 ) 3603 3604 def _parse_join_parts( 3605 self, 3606 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3607 return ( 3608 self._match_set(self.JOIN_METHODS) and self._prev, 3609 self._match_set(self.JOIN_SIDES) and self._prev, 3610 self._match_set(self.JOIN_KINDS) and self._prev, 3611 ) 3612 3613 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3614 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3615 this = self._parse_column() 3616 if isinstance(this, exp.Column): 3617 return this.this 3618 return this 3619 3620 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3621 3622 def _parse_join( 3623 self, skip_join_token: bool = False, parse_bracket: bool = False 3624 ) -> t.Optional[exp.Join]: 3625 if self._match(TokenType.COMMA): 3626 table = self._try_parse(self._parse_table) 3627 if table: 3628 return self.expression(exp.Join, this=table) 3629 return None 3630 3631 index = self._index 3632 method, side, kind = self._parse_join_parts() 3633 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3634 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3635 3636 if not skip_join_token and not join: 3637 self._retreat(index) 3638 kind = None 3639 method = None 3640 side = None 3641 3642 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3643 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3644 3645 if not skip_join_token and not join and not outer_apply and not cross_apply: 3646 return None 3647 3648 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3649 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3650 kwargs["expressions"] = self._parse_csv( 3651 lambda: self._parse_table(parse_bracket=parse_bracket) 3652 ) 3653 3654 if method: 3655 kwargs["method"] = method.text 3656 if side: 3657 kwargs["side"] = side.text 3658 if kind: 3659 kwargs["kind"] = kind.text 3660 if hint: 3661 kwargs["hint"] = hint 3662 3663 if self._match(TokenType.MATCH_CONDITION): 3664 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3665 3666 if self._match(TokenType.ON): 3667 kwargs["on"] = self._parse_assignment() 3668 elif self._match(TokenType.USING): 3669 kwargs["using"] = self._parse_using_identifiers() 3670 elif ( 3671 not (outer_apply or cross_apply) 3672 and not isinstance(kwargs["this"], exp.Unnest) 3673 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3674 ): 3675 index = self._index 3676 joins: t.Optional[list] = list(self._parse_joins()) 3677 3678 if joins and self._match(TokenType.ON): 3679 kwargs["on"] = self._parse_assignment() 3680 elif joins and self._match(TokenType.USING): 3681 kwargs["using"] = self._parse_using_identifiers() 3682 else: 3683 joins = None 3684 self._retreat(index) 3685 3686 kwargs["this"].set("joins", joins if joins else None) 3687 3688 comments = [c for token in (method, side, kind) if token for c in token.comments] 3689 return self.expression(exp.Join, comments=comments, **kwargs) 3690 3691 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3692 this = self._parse_assignment() 3693 3694 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3695 return this 3696 3697 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3698 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3699 3700 return this 3701 3702 def _parse_index_params(self) -> exp.IndexParameters: 3703 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3704 3705 if self._match(TokenType.L_PAREN, advance=False): 3706 columns = self._parse_wrapped_csv(self._parse_with_operator) 3707 else: 3708 columns = None 3709 3710 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3711 partition_by = self._parse_partition_by() 3712 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3713 tablespace = ( 3714 self._parse_var(any_token=True) 3715 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3716 else None 3717 ) 3718 where = self._parse_where() 3719 3720 on = self._parse_field() if self._match(TokenType.ON) else None 3721 3722 return self.expression( 3723 exp.IndexParameters, 3724 using=using, 3725 columns=columns, 3726 include=include, 3727 partition_by=partition_by, 3728 where=where, 3729 with_storage=with_storage, 3730 tablespace=tablespace, 3731 on=on, 3732 ) 3733 3734 def _parse_index( 3735 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3736 ) -> t.Optional[exp.Index]: 3737 if index or anonymous: 3738 unique = None 3739 primary = None 3740 amp = None 3741 3742 self._match(TokenType.ON) 3743 self._match(TokenType.TABLE) # hive 3744 table = self._parse_table_parts(schema=True) 3745 else: 3746 unique = self._match(TokenType.UNIQUE) 3747 primary = self._match_text_seq("PRIMARY") 3748 amp = self._match_text_seq("AMP") 3749 3750 if not self._match(TokenType.INDEX): 3751 return None 3752 3753 index = self._parse_id_var() 3754 table = None 3755 3756 params = self._parse_index_params() 3757 3758 return self.expression( 3759 exp.Index, 3760 this=index, 3761 table=table, 3762 unique=unique, 3763 primary=primary, 3764 amp=amp, 3765 params=params, 3766 ) 3767 3768 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3769 hints: t.List[exp.Expression] = [] 3770 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3771 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3772 hints.append( 3773 self.expression( 3774 exp.WithTableHint, 3775 expressions=self._parse_csv( 3776 lambda: self._parse_function() or self._parse_var(any_token=True) 3777 ), 3778 ) 3779 ) 3780 self._match_r_paren() 3781 else: 3782 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3783 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3784 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3785 3786 self._match_set((TokenType.INDEX, TokenType.KEY)) 3787 if self._match(TokenType.FOR): 3788 hint.set("target", self._advance_any() and self._prev.text.upper()) 3789 3790 hint.set("expressions", self._parse_wrapped_id_vars()) 3791 hints.append(hint) 3792 3793 return hints or None 3794 3795 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3796 return ( 3797 (not schema and self._parse_function(optional_parens=False)) 3798 or self._parse_id_var(any_token=False) 3799 or self._parse_string_as_identifier() 3800 or self._parse_placeholder() 3801 ) 3802 3803 def _parse_table_parts( 3804 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3805 ) -> exp.Table: 3806 catalog = None 3807 db = None 3808 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3809 3810 while self._match(TokenType.DOT): 3811 if catalog: 3812 # This allows nesting the table in arbitrarily many dot expressions if needed 3813 table = self.expression( 3814 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3815 ) 3816 else: 3817 catalog = db 3818 db = table 3819 # "" used for tsql FROM a..b case 3820 table = self._parse_table_part(schema=schema) or "" 3821 3822 if ( 3823 wildcard 3824 and self._is_connected() 3825 and (isinstance(table, exp.Identifier) or not table) 3826 and self._match(TokenType.STAR) 3827 ): 3828 if isinstance(table, exp.Identifier): 3829 table.args["this"] += "*" 3830 else: 3831 table = exp.Identifier(this="*") 3832 3833 # We bubble up comments from the Identifier to the Table 3834 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3835 3836 if is_db_reference: 3837 catalog = db 3838 db = table 3839 table = None 3840 3841 if not table and not is_db_reference: 3842 self.raise_error(f"Expected table name but got {self._curr}") 3843 if not db and is_db_reference: 3844 self.raise_error(f"Expected database name but got {self._curr}") 3845 3846 table = self.expression( 3847 exp.Table, 3848 comments=comments, 3849 this=table, 3850 db=db, 3851 catalog=catalog, 3852 ) 3853 3854 changes = self._parse_changes() 3855 if changes: 3856 table.set("changes", changes) 3857 3858 at_before = self._parse_historical_data() 3859 if at_before: 3860 table.set("when", at_before) 3861 3862 pivots = self._parse_pivots() 3863 if pivots: 3864 table.set("pivots", pivots) 3865 3866 return table 3867 3868 def _parse_table( 3869 self, 3870 schema: bool = False, 3871 joins: bool = False, 3872 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3873 parse_bracket: bool = False, 3874 is_db_reference: bool = False, 3875 parse_partition: bool = False, 3876 ) -> t.Optional[exp.Expression]: 3877 lateral = self._parse_lateral() 3878 if lateral: 3879 return lateral 3880 3881 unnest = self._parse_unnest() 3882 if unnest: 3883 return unnest 3884 3885 values = self._parse_derived_table_values() 3886 if values: 3887 return values 3888 3889 subquery = self._parse_select(table=True) 3890 if subquery: 3891 if not subquery.args.get("pivots"): 3892 subquery.set("pivots", self._parse_pivots()) 3893 return subquery 3894 3895 bracket = parse_bracket and self._parse_bracket(None) 3896 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3897 3898 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3899 self._parse_table 3900 ) 3901 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3902 3903 only = self._match(TokenType.ONLY) 3904 3905 this = t.cast( 3906 exp.Expression, 3907 bracket 3908 or rows_from 3909 or self._parse_bracket( 3910 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3911 ), 3912 ) 3913 3914 if only: 3915 this.set("only", only) 3916 3917 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3918 self._match_text_seq("*") 3919 3920 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3921 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3922 this.set("partition", self._parse_partition()) 3923 3924 if schema: 3925 return self._parse_schema(this=this) 3926 3927 version = self._parse_version() 3928 3929 if version: 3930 this.set("version", version) 3931 3932 if self.dialect.ALIAS_POST_TABLESAMPLE: 3933 this.set("sample", self._parse_table_sample()) 3934 3935 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3936 if alias: 3937 this.set("alias", alias) 3938 3939 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3940 return self.expression( 3941 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3942 ) 3943 3944 this.set("hints", self._parse_table_hints()) 3945 3946 if not this.args.get("pivots"): 3947 this.set("pivots", self._parse_pivots()) 3948 3949 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3950 this.set("sample", self._parse_table_sample()) 3951 3952 if joins: 3953 for join in self._parse_joins(): 3954 this.append("joins", join) 3955 3956 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3957 this.set("ordinality", True) 3958 this.set("alias", self._parse_table_alias()) 3959 3960 return this 3961 3962 def _parse_version(self) -> t.Optional[exp.Version]: 3963 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3964 this = "TIMESTAMP" 3965 elif self._match(TokenType.VERSION_SNAPSHOT): 3966 this = "VERSION" 3967 else: 3968 return None 3969 3970 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3971 kind = self._prev.text.upper() 3972 start = self._parse_bitwise() 3973 self._match_texts(("TO", "AND")) 3974 end = self._parse_bitwise() 3975 expression: t.Optional[exp.Expression] = self.expression( 3976 exp.Tuple, expressions=[start, end] 3977 ) 3978 elif self._match_text_seq("CONTAINED", "IN"): 3979 kind = "CONTAINED IN" 3980 expression = self.expression( 3981 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3982 ) 3983 elif self._match(TokenType.ALL): 3984 kind = "ALL" 3985 expression = None 3986 else: 3987 self._match_text_seq("AS", "OF") 3988 kind = "AS OF" 3989 expression = self._parse_type() 3990 3991 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3992 3993 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3994 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3995 index = self._index 3996 historical_data = None 3997 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3998 this = self._prev.text.upper() 3999 kind = ( 4000 self._match(TokenType.L_PAREN) 4001 and self._match_texts(self.HISTORICAL_DATA_KIND) 4002 and self._prev.text.upper() 4003 ) 4004 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4005 4006 if expression: 4007 self._match_r_paren() 4008 historical_data = self.expression( 4009 exp.HistoricalData, this=this, kind=kind, expression=expression 4010 ) 4011 else: 4012 self._retreat(index) 4013 4014 return historical_data 4015 4016 def _parse_changes(self) -> t.Optional[exp.Changes]: 4017 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4018 return None 4019 4020 information = self._parse_var(any_token=True) 4021 self._match_r_paren() 4022 4023 return self.expression( 4024 exp.Changes, 4025 information=information, 4026 at_before=self._parse_historical_data(), 4027 end=self._parse_historical_data(), 4028 ) 4029 4030 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4031 if not self._match(TokenType.UNNEST): 4032 return None 4033 4034 expressions = self._parse_wrapped_csv(self._parse_equality) 4035 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4036 4037 alias = self._parse_table_alias() if with_alias else None 4038 4039 if alias: 4040 if self.dialect.UNNEST_COLUMN_ONLY: 4041 if alias.args.get("columns"): 4042 self.raise_error("Unexpected extra column alias in unnest.") 4043 4044 alias.set("columns", [alias.this]) 4045 alias.set("this", None) 4046 4047 columns = alias.args.get("columns") or [] 4048 if offset and len(expressions) < len(columns): 4049 offset = columns.pop() 4050 4051 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4052 self._match(TokenType.ALIAS) 4053 offset = self._parse_id_var( 4054 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4055 ) or exp.to_identifier("offset") 4056 4057 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4058 4059 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4060 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4061 if not is_derived and not ( 4062 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4063 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4064 ): 4065 return None 4066 4067 expressions = self._parse_csv(self._parse_value) 4068 alias = self._parse_table_alias() 4069 4070 if is_derived: 4071 self._match_r_paren() 4072 4073 return self.expression( 4074 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4075 ) 4076 4077 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4078 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4079 as_modifier and self._match_text_seq("USING", "SAMPLE") 4080 ): 4081 return None 4082 4083 bucket_numerator = None 4084 bucket_denominator = None 4085 bucket_field = None 4086 percent = None 4087 size = None 4088 seed = None 4089 4090 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4091 matched_l_paren = self._match(TokenType.L_PAREN) 4092 4093 if self.TABLESAMPLE_CSV: 4094 num = None 4095 expressions = self._parse_csv(self._parse_primary) 4096 else: 4097 expressions = None 4098 num = ( 4099 self._parse_factor() 4100 if self._match(TokenType.NUMBER, advance=False) 4101 else self._parse_primary() or self._parse_placeholder() 4102 ) 4103 4104 if self._match_text_seq("BUCKET"): 4105 bucket_numerator = self._parse_number() 4106 self._match_text_seq("OUT", "OF") 4107 bucket_denominator = bucket_denominator = self._parse_number() 4108 self._match(TokenType.ON) 4109 bucket_field = self._parse_field() 4110 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4111 percent = num 4112 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4113 size = num 4114 else: 4115 percent = num 4116 4117 if matched_l_paren: 4118 self._match_r_paren() 4119 4120 if self._match(TokenType.L_PAREN): 4121 method = self._parse_var(upper=True) 4122 seed = self._match(TokenType.COMMA) and self._parse_number() 4123 self._match_r_paren() 4124 elif self._match_texts(("SEED", "REPEATABLE")): 4125 seed = self._parse_wrapped(self._parse_number) 4126 4127 if not method and self.DEFAULT_SAMPLING_METHOD: 4128 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4129 4130 return self.expression( 4131 exp.TableSample, 4132 expressions=expressions, 4133 method=method, 4134 bucket_numerator=bucket_numerator, 4135 bucket_denominator=bucket_denominator, 4136 bucket_field=bucket_field, 4137 percent=percent, 4138 size=size, 4139 seed=seed, 4140 ) 4141 4142 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4143 return list(iter(self._parse_pivot, None)) or None 4144 4145 def _parse_joins(self) -> t.Iterator[exp.Join]: 4146 return iter(self._parse_join, None) 4147 4148 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4149 if not self._match(TokenType.INTO): 4150 return None 4151 4152 return self.expression( 4153 exp.UnpivotColumns, 4154 this=self._match_text_seq("NAME") and self._parse_column(), 4155 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4156 ) 4157 4158 # https://duckdb.org/docs/sql/statements/pivot 4159 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4160 def _parse_on() -> t.Optional[exp.Expression]: 4161 this = self._parse_bitwise() 4162 4163 if self._match(TokenType.IN): 4164 # PIVOT ... ON col IN (row_val1, row_val2) 4165 return self._parse_in(this) 4166 if self._match(TokenType.ALIAS, advance=False): 4167 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4168 return self._parse_alias(this) 4169 4170 return this 4171 4172 this = self._parse_table() 4173 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4174 into = self._parse_unpivot_columns() 4175 using = self._match(TokenType.USING) and self._parse_csv( 4176 lambda: self._parse_alias(self._parse_function()) 4177 ) 4178 group = self._parse_group() 4179 4180 return self.expression( 4181 exp.Pivot, 4182 this=this, 4183 expressions=expressions, 4184 using=using, 4185 group=group, 4186 unpivot=is_unpivot, 4187 into=into, 4188 ) 4189 4190 def _parse_pivot_in(self) -> exp.In: 4191 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4192 this = self._parse_select_or_expression() 4193 4194 self._match(TokenType.ALIAS) 4195 alias = self._parse_bitwise() 4196 if alias: 4197 if isinstance(alias, exp.Column) and not alias.db: 4198 alias = alias.this 4199 return self.expression(exp.PivotAlias, this=this, alias=alias) 4200 4201 return this 4202 4203 value = self._parse_column() 4204 4205 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4206 self.raise_error("Expecting IN (") 4207 4208 if self._match(TokenType.ANY): 4209 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4210 else: 4211 exprs = self._parse_csv(_parse_aliased_expression) 4212 4213 self._match_r_paren() 4214 return self.expression(exp.In, this=value, expressions=exprs) 4215 4216 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4217 index = self._index 4218 include_nulls = None 4219 4220 if self._match(TokenType.PIVOT): 4221 unpivot = False 4222 elif self._match(TokenType.UNPIVOT): 4223 unpivot = True 4224 4225 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4226 if self._match_text_seq("INCLUDE", "NULLS"): 4227 include_nulls = True 4228 elif self._match_text_seq("EXCLUDE", "NULLS"): 4229 include_nulls = False 4230 else: 4231 return None 4232 4233 expressions = [] 4234 4235 if not self._match(TokenType.L_PAREN): 4236 self._retreat(index) 4237 return None 4238 4239 if unpivot: 4240 expressions = self._parse_csv(self._parse_column) 4241 else: 4242 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4243 4244 if not expressions: 4245 self.raise_error("Failed to parse PIVOT's aggregation list") 4246 4247 if not self._match(TokenType.FOR): 4248 self.raise_error("Expecting FOR") 4249 4250 fields = [] 4251 while True: 4252 field = self._try_parse(self._parse_pivot_in) 4253 if not field: 4254 break 4255 fields.append(field) 4256 4257 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4258 self._parse_bitwise 4259 ) 4260 4261 group = self._parse_group() 4262 4263 self._match_r_paren() 4264 4265 pivot = self.expression( 4266 exp.Pivot, 4267 expressions=expressions, 4268 fields=fields, 4269 unpivot=unpivot, 4270 include_nulls=include_nulls, 4271 default_on_null=default_on_null, 4272 group=group, 4273 ) 4274 4275 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4276 pivot.set("alias", self._parse_table_alias()) 4277 4278 if not unpivot: 4279 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4280 4281 columns: t.List[exp.Expression] = [] 4282 all_fields = [] 4283 for pivot_field in pivot.fields: 4284 pivot_field_expressions = pivot_field.expressions 4285 4286 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4287 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4288 continue 4289 4290 all_fields.append( 4291 [ 4292 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4293 for fld in pivot_field_expressions 4294 ] 4295 ) 4296 4297 if all_fields: 4298 if names: 4299 all_fields.append(names) 4300 4301 # Generate all possible combinations of the pivot columns 4302 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4303 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4304 for fld_parts_tuple in itertools.product(*all_fields): 4305 fld_parts = list(fld_parts_tuple) 4306 4307 if names and self.PREFIXED_PIVOT_COLUMNS: 4308 # Move the "name" to the front of the list 4309 fld_parts.insert(0, fld_parts.pop(-1)) 4310 4311 columns.append(exp.to_identifier("_".join(fld_parts))) 4312 4313 pivot.set("columns", columns) 4314 4315 return pivot 4316 4317 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4318 return [agg.alias for agg in aggregations if agg.alias] 4319 4320 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4321 if not skip_where_token and not self._match(TokenType.PREWHERE): 4322 return None 4323 4324 return self.expression( 4325 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4326 ) 4327 4328 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4329 if not skip_where_token and not self._match(TokenType.WHERE): 4330 return None 4331 4332 return self.expression( 4333 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4334 ) 4335 4336 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4337 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4338 return None 4339 4340 elements: t.Dict[str, t.Any] = defaultdict(list) 4341 4342 if self._match(TokenType.ALL): 4343 elements["all"] = True 4344 elif self._match(TokenType.DISTINCT): 4345 elements["all"] = False 4346 4347 while True: 4348 index = self._index 4349 4350 elements["expressions"].extend( 4351 self._parse_csv( 4352 lambda: None 4353 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4354 else self._parse_assignment() 4355 ) 4356 ) 4357 4358 before_with_index = self._index 4359 with_prefix = self._match(TokenType.WITH) 4360 4361 if self._match(TokenType.ROLLUP): 4362 elements["rollup"].append( 4363 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4364 ) 4365 elif self._match(TokenType.CUBE): 4366 elements["cube"].append( 4367 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4368 ) 4369 elif self._match(TokenType.GROUPING_SETS): 4370 elements["grouping_sets"].append( 4371 self.expression( 4372 exp.GroupingSets, 4373 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4374 ) 4375 ) 4376 elif self._match_text_seq("TOTALS"): 4377 elements["totals"] = True # type: ignore 4378 4379 if before_with_index <= self._index <= before_with_index + 1: 4380 self._retreat(before_with_index) 4381 break 4382 4383 if index == self._index: 4384 break 4385 4386 return self.expression(exp.Group, **elements) # type: ignore 4387 4388 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4389 return self.expression( 4390 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4391 ) 4392 4393 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4394 if self._match(TokenType.L_PAREN): 4395 grouping_set = self._parse_csv(self._parse_column) 4396 self._match_r_paren() 4397 return self.expression(exp.Tuple, expressions=grouping_set) 4398 4399 return self._parse_column() 4400 4401 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4402 if not skip_having_token and not self._match(TokenType.HAVING): 4403 return None 4404 return self.expression(exp.Having, this=self._parse_assignment()) 4405 4406 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4407 if not self._match(TokenType.QUALIFY): 4408 return None 4409 return self.expression(exp.Qualify, this=self._parse_assignment()) 4410 4411 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4412 if skip_start_token: 4413 start = None 4414 elif self._match(TokenType.START_WITH): 4415 start = self._parse_assignment() 4416 else: 4417 return None 4418 4419 self._match(TokenType.CONNECT_BY) 4420 nocycle = self._match_text_seq("NOCYCLE") 4421 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4422 exp.Prior, this=self._parse_bitwise() 4423 ) 4424 connect = self._parse_assignment() 4425 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4426 4427 if not start and self._match(TokenType.START_WITH): 4428 start = self._parse_assignment() 4429 4430 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4431 4432 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4433 this = self._parse_id_var(any_token=True) 4434 if self._match(TokenType.ALIAS): 4435 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4436 return this 4437 4438 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4439 if self._match_text_seq("INTERPOLATE"): 4440 return self._parse_wrapped_csv(self._parse_name_as_expression) 4441 return None 4442 4443 def _parse_order( 4444 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4445 ) -> t.Optional[exp.Expression]: 4446 siblings = None 4447 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4448 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4449 return this 4450 4451 siblings = True 4452 4453 return self.expression( 4454 exp.Order, 4455 this=this, 4456 expressions=self._parse_csv(self._parse_ordered), 4457 siblings=siblings, 4458 ) 4459 4460 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4461 if not self._match(token): 4462 return None 4463 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4464 4465 def _parse_ordered( 4466 self, parse_method: t.Optional[t.Callable] = None 4467 ) -> t.Optional[exp.Ordered]: 4468 this = parse_method() if parse_method else self._parse_assignment() 4469 if not this: 4470 return None 4471 4472 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4473 this = exp.var("ALL") 4474 4475 asc = self._match(TokenType.ASC) 4476 desc = self._match(TokenType.DESC) or (asc and False) 4477 4478 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4479 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4480 4481 nulls_first = is_nulls_first or False 4482 explicitly_null_ordered = is_nulls_first or is_nulls_last 4483 4484 if ( 4485 not explicitly_null_ordered 4486 and ( 4487 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4488 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4489 ) 4490 and self.dialect.NULL_ORDERING != "nulls_are_last" 4491 ): 4492 nulls_first = True 4493 4494 if self._match_text_seq("WITH", "FILL"): 4495 with_fill = self.expression( 4496 exp.WithFill, 4497 **{ # type: ignore 4498 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4499 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4500 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4501 "interpolate": self._parse_interpolate(), 4502 }, 4503 ) 4504 else: 4505 with_fill = None 4506 4507 return self.expression( 4508 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4509 ) 4510 4511 def _parse_limit_options(self) -> exp.LimitOptions: 4512 percent = self._match(TokenType.PERCENT) 4513 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4514 self._match_text_seq("ONLY") 4515 with_ties = self._match_text_seq("WITH", "TIES") 4516 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4517 4518 def _parse_limit( 4519 self, 4520 this: t.Optional[exp.Expression] = None, 4521 top: bool = False, 4522 skip_limit_token: bool = False, 4523 ) -> t.Optional[exp.Expression]: 4524 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4525 comments = self._prev_comments 4526 if top: 4527 limit_paren = self._match(TokenType.L_PAREN) 4528 expression = self._parse_term() if limit_paren else self._parse_number() 4529 4530 if limit_paren: 4531 self._match_r_paren() 4532 4533 limit_options = self._parse_limit_options() 4534 else: 4535 limit_options = None 4536 expression = self._parse_term() 4537 4538 if self._match(TokenType.COMMA): 4539 offset = expression 4540 expression = self._parse_term() 4541 else: 4542 offset = None 4543 4544 limit_exp = self.expression( 4545 exp.Limit, 4546 this=this, 4547 expression=expression, 4548 offset=offset, 4549 comments=comments, 4550 limit_options=limit_options, 4551 expressions=self._parse_limit_by(), 4552 ) 4553 4554 return limit_exp 4555 4556 if self._match(TokenType.FETCH): 4557 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4558 direction = self._prev.text.upper() if direction else "FIRST" 4559 4560 count = self._parse_field(tokens=self.FETCH_TOKENS) 4561 4562 return self.expression( 4563 exp.Fetch, 4564 direction=direction, 4565 count=count, 4566 limit_options=self._parse_limit_options(), 4567 ) 4568 4569 return this 4570 4571 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4572 if not self._match(TokenType.OFFSET): 4573 return this 4574 4575 count = self._parse_term() 4576 self._match_set((TokenType.ROW, TokenType.ROWS)) 4577 4578 return self.expression( 4579 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4580 ) 4581 4582 def _can_parse_limit_or_offset(self) -> bool: 4583 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4584 return False 4585 4586 index = self._index 4587 result = bool( 4588 self._try_parse(self._parse_limit, retreat=True) 4589 or self._try_parse(self._parse_offset, retreat=True) 4590 ) 4591 self._retreat(index) 4592 return result 4593 4594 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4595 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4596 4597 def _parse_locks(self) -> t.List[exp.Lock]: 4598 locks = [] 4599 while True: 4600 if self._match_text_seq("FOR", "UPDATE"): 4601 update = True 4602 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4603 "LOCK", "IN", "SHARE", "MODE" 4604 ): 4605 update = False 4606 else: 4607 break 4608 4609 expressions = None 4610 if self._match_text_seq("OF"): 4611 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4612 4613 wait: t.Optional[bool | exp.Expression] = None 4614 if self._match_text_seq("NOWAIT"): 4615 wait = True 4616 elif self._match_text_seq("WAIT"): 4617 wait = self._parse_primary() 4618 elif self._match_text_seq("SKIP", "LOCKED"): 4619 wait = False 4620 4621 locks.append( 4622 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4623 ) 4624 4625 return locks 4626 4627 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4628 start = self._index 4629 _, side_token, kind_token = self._parse_join_parts() 4630 4631 side = side_token.text if side_token else None 4632 kind = kind_token.text if kind_token else None 4633 4634 if not self._match_set(self.SET_OPERATIONS): 4635 self._retreat(start) 4636 return None 4637 4638 token_type = self._prev.token_type 4639 4640 if token_type == TokenType.UNION: 4641 operation: t.Type[exp.SetOperation] = exp.Union 4642 elif token_type == TokenType.EXCEPT: 4643 operation = exp.Except 4644 else: 4645 operation = exp.Intersect 4646 4647 comments = self._prev.comments 4648 4649 if self._match(TokenType.DISTINCT): 4650 distinct: t.Optional[bool] = True 4651 elif self._match(TokenType.ALL): 4652 distinct = False 4653 else: 4654 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4655 if distinct is None: 4656 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4657 4658 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4659 "STRICT", "CORRESPONDING" 4660 ) 4661 if self._match_text_seq("CORRESPONDING"): 4662 by_name = True 4663 if not side and not kind: 4664 kind = "INNER" 4665 4666 on_column_list = None 4667 if by_name and self._match_texts(("ON", "BY")): 4668 on_column_list = self._parse_wrapped_csv(self._parse_column) 4669 4670 expression = self._parse_select(nested=True, parse_set_operation=False) 4671 4672 return self.expression( 4673 operation, 4674 comments=comments, 4675 this=this, 4676 distinct=distinct, 4677 by_name=by_name, 4678 expression=expression, 4679 side=side, 4680 kind=kind, 4681 on=on_column_list, 4682 ) 4683 4684 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4685 while True: 4686 setop = self.parse_set_operation(this) 4687 if not setop: 4688 break 4689 this = setop 4690 4691 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4692 expression = this.expression 4693 4694 if expression: 4695 for arg in self.SET_OP_MODIFIERS: 4696 expr = expression.args.get(arg) 4697 if expr: 4698 this.set(arg, expr.pop()) 4699 4700 return this 4701 4702 def _parse_expression(self) -> t.Optional[exp.Expression]: 4703 return self._parse_alias(self._parse_assignment()) 4704 4705 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4706 this = self._parse_disjunction() 4707 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4708 # This allows us to parse <non-identifier token> := <expr> 4709 this = exp.column( 4710 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4711 ) 4712 4713 while self._match_set(self.ASSIGNMENT): 4714 if isinstance(this, exp.Column) and len(this.parts) == 1: 4715 this = this.this 4716 4717 this = self.expression( 4718 self.ASSIGNMENT[self._prev.token_type], 4719 this=this, 4720 comments=self._prev_comments, 4721 expression=self._parse_assignment(), 4722 ) 4723 4724 return this 4725 4726 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4727 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4728 4729 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4730 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4731 4732 def _parse_equality(self) -> t.Optional[exp.Expression]: 4733 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4734 4735 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4736 return self._parse_tokens(self._parse_range, self.COMPARISON) 4737 4738 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4739 this = this or self._parse_bitwise() 4740 negate = self._match(TokenType.NOT) 4741 4742 if self._match_set(self.RANGE_PARSERS): 4743 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4744 if not expression: 4745 return this 4746 4747 this = expression 4748 elif self._match(TokenType.ISNULL): 4749 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4750 4751 # Postgres supports ISNULL and NOTNULL for conditions. 4752 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4753 if self._match(TokenType.NOTNULL): 4754 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4755 this = self.expression(exp.Not, this=this) 4756 4757 if negate: 4758 this = self._negate_range(this) 4759 4760 if self._match(TokenType.IS): 4761 this = self._parse_is(this) 4762 4763 return this 4764 4765 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4766 if not this: 4767 return this 4768 4769 return self.expression(exp.Not, this=this) 4770 4771 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4772 index = self._index - 1 4773 negate = self._match(TokenType.NOT) 4774 4775 if self._match_text_seq("DISTINCT", "FROM"): 4776 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4777 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4778 4779 if self._match(TokenType.JSON): 4780 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4781 4782 if self._match_text_seq("WITH"): 4783 _with = True 4784 elif self._match_text_seq("WITHOUT"): 4785 _with = False 4786 else: 4787 _with = None 4788 4789 unique = self._match(TokenType.UNIQUE) 4790 self._match_text_seq("KEYS") 4791 expression: t.Optional[exp.Expression] = self.expression( 4792 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4793 ) 4794 else: 4795 expression = self._parse_primary() or self._parse_null() 4796 if not expression: 4797 self._retreat(index) 4798 return None 4799 4800 this = self.expression(exp.Is, this=this, expression=expression) 4801 return self.expression(exp.Not, this=this) if negate else this 4802 4803 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4804 unnest = self._parse_unnest(with_alias=False) 4805 if unnest: 4806 this = self.expression(exp.In, this=this, unnest=unnest) 4807 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4808 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4809 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4810 4811 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4812 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4813 else: 4814 this = self.expression(exp.In, this=this, expressions=expressions) 4815 4816 if matched_l_paren: 4817 self._match_r_paren(this) 4818 elif not self._match(TokenType.R_BRACKET, expression=this): 4819 self.raise_error("Expecting ]") 4820 else: 4821 this = self.expression(exp.In, this=this, field=self._parse_column()) 4822 4823 return this 4824 4825 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4826 low = self._parse_bitwise() 4827 self._match(TokenType.AND) 4828 high = self._parse_bitwise() 4829 return self.expression(exp.Between, this=this, low=low, high=high) 4830 4831 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4832 if not self._match(TokenType.ESCAPE): 4833 return this 4834 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4835 4836 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4837 index = self._index 4838 4839 if not self._match(TokenType.INTERVAL) and match_interval: 4840 return None 4841 4842 if self._match(TokenType.STRING, advance=False): 4843 this = self._parse_primary() 4844 else: 4845 this = self._parse_term() 4846 4847 if not this or ( 4848 isinstance(this, exp.Column) 4849 and not this.table 4850 and not this.this.quoted 4851 and this.name.upper() == "IS" 4852 ): 4853 self._retreat(index) 4854 return None 4855 4856 unit = self._parse_function() or ( 4857 not self._match(TokenType.ALIAS, advance=False) 4858 and self._parse_var(any_token=True, upper=True) 4859 ) 4860 4861 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4862 # each INTERVAL expression into this canonical form so it's easy to transpile 4863 if this and this.is_number: 4864 this = exp.Literal.string(this.to_py()) 4865 elif this and this.is_string: 4866 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4867 if parts and unit: 4868 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4869 unit = None 4870 self._retreat(self._index - 1) 4871 4872 if len(parts) == 1: 4873 this = exp.Literal.string(parts[0][0]) 4874 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4875 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4876 unit = self.expression( 4877 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4878 ) 4879 4880 interval = self.expression(exp.Interval, this=this, unit=unit) 4881 4882 index = self._index 4883 self._match(TokenType.PLUS) 4884 4885 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4886 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4887 return self.expression( 4888 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4889 ) 4890 4891 self._retreat(index) 4892 return interval 4893 4894 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4895 this = self._parse_term() 4896 4897 while True: 4898 if self._match_set(self.BITWISE): 4899 this = self.expression( 4900 self.BITWISE[self._prev.token_type], 4901 this=this, 4902 expression=self._parse_term(), 4903 ) 4904 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4905 this = self.expression( 4906 exp.DPipe, 4907 this=this, 4908 expression=self._parse_term(), 4909 safe=not self.dialect.STRICT_STRING_CONCAT, 4910 ) 4911 elif self._match(TokenType.DQMARK): 4912 this = self.expression( 4913 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4914 ) 4915 elif self._match_pair(TokenType.LT, TokenType.LT): 4916 this = self.expression( 4917 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4918 ) 4919 elif self._match_pair(TokenType.GT, TokenType.GT): 4920 this = self.expression( 4921 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4922 ) 4923 else: 4924 break 4925 4926 return this 4927 4928 def _parse_term(self) -> t.Optional[exp.Expression]: 4929 this = self._parse_factor() 4930 4931 while self._match_set(self.TERM): 4932 klass = self.TERM[self._prev.token_type] 4933 comments = self._prev_comments 4934 expression = self._parse_factor() 4935 4936 this = self.expression(klass, this=this, comments=comments, expression=expression) 4937 4938 if isinstance(this, exp.Collate): 4939 expr = this.expression 4940 4941 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4942 # fallback to Identifier / Var 4943 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4944 ident = expr.this 4945 if isinstance(ident, exp.Identifier): 4946 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4947 4948 return this 4949 4950 def _parse_factor(self) -> t.Optional[exp.Expression]: 4951 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4952 this = parse_method() 4953 4954 while self._match_set(self.FACTOR): 4955 klass = self.FACTOR[self._prev.token_type] 4956 comments = self._prev_comments 4957 expression = parse_method() 4958 4959 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4960 self._retreat(self._index - 1) 4961 return this 4962 4963 this = self.expression(klass, this=this, comments=comments, expression=expression) 4964 4965 if isinstance(this, exp.Div): 4966 this.args["typed"] = self.dialect.TYPED_DIVISION 4967 this.args["safe"] = self.dialect.SAFE_DIVISION 4968 4969 return this 4970 4971 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4972 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4973 4974 def _parse_unary(self) -> t.Optional[exp.Expression]: 4975 if self._match_set(self.UNARY_PARSERS): 4976 return self.UNARY_PARSERS[self._prev.token_type](self) 4977 return self._parse_at_time_zone(self._parse_type()) 4978 4979 def _parse_type( 4980 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4981 ) -> t.Optional[exp.Expression]: 4982 interval = parse_interval and self._parse_interval() 4983 if interval: 4984 return interval 4985 4986 index = self._index 4987 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4988 4989 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4990 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4991 if isinstance(data_type, exp.Cast): 4992 # This constructor can contain ops directly after it, for instance struct unnesting: 4993 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4994 return self._parse_column_ops(data_type) 4995 4996 if data_type: 4997 index2 = self._index 4998 this = self._parse_primary() 4999 5000 if isinstance(this, exp.Literal): 5001 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5002 if parser: 5003 return parser(self, this, data_type) 5004 5005 return self.expression(exp.Cast, this=this, to=data_type) 5006 5007 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5008 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5009 # 5010 # If the index difference here is greater than 1, that means the parser itself must have 5011 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5012 # 5013 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5014 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5015 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5016 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5017 # 5018 # In these cases, we don't really want to return the converted type, but instead retreat 5019 # and try to parse a Column or Identifier in the section below. 5020 if data_type.expressions and index2 - index > 1: 5021 self._retreat(index2) 5022 return self._parse_column_ops(data_type) 5023 5024 self._retreat(index) 5025 5026 if fallback_to_identifier: 5027 return self._parse_id_var() 5028 5029 this = self._parse_column() 5030 return this and self._parse_column_ops(this) 5031 5032 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5033 this = self._parse_type() 5034 if not this: 5035 return None 5036 5037 if isinstance(this, exp.Column) and not this.table: 5038 this = exp.var(this.name.upper()) 5039 5040 return self.expression( 5041 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5042 ) 5043 5044 def _parse_types( 5045 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5046 ) -> t.Optional[exp.Expression]: 5047 index = self._index 5048 5049 this: t.Optional[exp.Expression] = None 5050 prefix = self._match_text_seq("SYSUDTLIB", ".") 5051 5052 if not self._match_set(self.TYPE_TOKENS): 5053 identifier = allow_identifiers and self._parse_id_var( 5054 any_token=False, tokens=(TokenType.VAR,) 5055 ) 5056 if isinstance(identifier, exp.Identifier): 5057 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5058 5059 if len(tokens) != 1: 5060 self.raise_error("Unexpected identifier", self._prev) 5061 5062 if tokens[0].token_type in self.TYPE_TOKENS: 5063 self._prev = tokens[0] 5064 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5065 type_name = identifier.name 5066 5067 while self._match(TokenType.DOT): 5068 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5069 5070 this = exp.DataType.build(type_name, udt=True) 5071 else: 5072 self._retreat(self._index - 1) 5073 return None 5074 else: 5075 return None 5076 5077 type_token = self._prev.token_type 5078 5079 if type_token == TokenType.PSEUDO_TYPE: 5080 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5081 5082 if type_token == TokenType.OBJECT_IDENTIFIER: 5083 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5084 5085 # https://materialize.com/docs/sql/types/map/ 5086 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5087 key_type = self._parse_types( 5088 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5089 ) 5090 if not self._match(TokenType.FARROW): 5091 self._retreat(index) 5092 return None 5093 5094 value_type = self._parse_types( 5095 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5096 ) 5097 if not self._match(TokenType.R_BRACKET): 5098 self._retreat(index) 5099 return None 5100 5101 return exp.DataType( 5102 this=exp.DataType.Type.MAP, 5103 expressions=[key_type, value_type], 5104 nested=True, 5105 prefix=prefix, 5106 ) 5107 5108 nested = type_token in self.NESTED_TYPE_TOKENS 5109 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5110 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5111 expressions = None 5112 maybe_func = False 5113 5114 if self._match(TokenType.L_PAREN): 5115 if is_struct: 5116 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5117 elif nested: 5118 expressions = self._parse_csv( 5119 lambda: self._parse_types( 5120 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5121 ) 5122 ) 5123 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5124 this = expressions[0] 5125 this.set("nullable", True) 5126 self._match_r_paren() 5127 return this 5128 elif type_token in self.ENUM_TYPE_TOKENS: 5129 expressions = self._parse_csv(self._parse_equality) 5130 elif is_aggregate: 5131 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5132 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5133 ) 5134 if not func_or_ident: 5135 return None 5136 expressions = [func_or_ident] 5137 if self._match(TokenType.COMMA): 5138 expressions.extend( 5139 self._parse_csv( 5140 lambda: self._parse_types( 5141 check_func=check_func, 5142 schema=schema, 5143 allow_identifiers=allow_identifiers, 5144 ) 5145 ) 5146 ) 5147 else: 5148 expressions = self._parse_csv(self._parse_type_size) 5149 5150 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5151 if type_token == TokenType.VECTOR and len(expressions) == 2: 5152 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5153 5154 if not expressions or not self._match(TokenType.R_PAREN): 5155 self._retreat(index) 5156 return None 5157 5158 maybe_func = True 5159 5160 values: t.Optional[t.List[exp.Expression]] = None 5161 5162 if nested and self._match(TokenType.LT): 5163 if is_struct: 5164 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5165 else: 5166 expressions = self._parse_csv( 5167 lambda: self._parse_types( 5168 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5169 ) 5170 ) 5171 5172 if not self._match(TokenType.GT): 5173 self.raise_error("Expecting >") 5174 5175 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5176 values = self._parse_csv(self._parse_assignment) 5177 if not values and is_struct: 5178 values = None 5179 self._retreat(self._index - 1) 5180 else: 5181 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5182 5183 if type_token in self.TIMESTAMPS: 5184 if self._match_text_seq("WITH", "TIME", "ZONE"): 5185 maybe_func = False 5186 tz_type = ( 5187 exp.DataType.Type.TIMETZ 5188 if type_token in self.TIMES 5189 else exp.DataType.Type.TIMESTAMPTZ 5190 ) 5191 this = exp.DataType(this=tz_type, expressions=expressions) 5192 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5193 maybe_func = False 5194 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5195 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5196 maybe_func = False 5197 elif type_token == TokenType.INTERVAL: 5198 unit = self._parse_var(upper=True) 5199 if unit: 5200 if self._match_text_seq("TO"): 5201 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5202 5203 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5204 else: 5205 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5206 5207 if maybe_func and check_func: 5208 index2 = self._index 5209 peek = self._parse_string() 5210 5211 if not peek: 5212 self._retreat(index) 5213 return None 5214 5215 self._retreat(index2) 5216 5217 if not this: 5218 if self._match_text_seq("UNSIGNED"): 5219 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5220 if not unsigned_type_token: 5221 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5222 5223 type_token = unsigned_type_token or type_token 5224 5225 this = exp.DataType( 5226 this=exp.DataType.Type[type_token.value], 5227 expressions=expressions, 5228 nested=nested, 5229 prefix=prefix, 5230 ) 5231 5232 # Empty arrays/structs are allowed 5233 if values is not None: 5234 cls = exp.Struct if is_struct else exp.Array 5235 this = exp.cast(cls(expressions=values), this, copy=False) 5236 5237 elif expressions: 5238 this.set("expressions", expressions) 5239 5240 # https://materialize.com/docs/sql/types/list/#type-name 5241 while self._match(TokenType.LIST): 5242 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5243 5244 index = self._index 5245 5246 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5247 matched_array = self._match(TokenType.ARRAY) 5248 5249 while self._curr: 5250 datatype_token = self._prev.token_type 5251 matched_l_bracket = self._match(TokenType.L_BRACKET) 5252 5253 if (not matched_l_bracket and not matched_array) or ( 5254 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5255 ): 5256 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5257 # not to be confused with the fixed size array parsing 5258 break 5259 5260 matched_array = False 5261 values = self._parse_csv(self._parse_assignment) or None 5262 if ( 5263 values 5264 and not schema 5265 and ( 5266 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5267 ) 5268 ): 5269 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5270 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5271 self._retreat(index) 5272 break 5273 5274 this = exp.DataType( 5275 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5276 ) 5277 self._match(TokenType.R_BRACKET) 5278 5279 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5280 converter = self.TYPE_CONVERTERS.get(this.this) 5281 if converter: 5282 this = converter(t.cast(exp.DataType, this)) 5283 5284 return this 5285 5286 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5287 index = self._index 5288 5289 if ( 5290 self._curr 5291 and self._next 5292 and self._curr.token_type in self.TYPE_TOKENS 5293 and self._next.token_type in self.TYPE_TOKENS 5294 ): 5295 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5296 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5297 this = self._parse_id_var() 5298 else: 5299 this = ( 5300 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5301 or self._parse_id_var() 5302 ) 5303 5304 self._match(TokenType.COLON) 5305 5306 if ( 5307 type_required 5308 and not isinstance(this, exp.DataType) 5309 and not self._match_set(self.TYPE_TOKENS, advance=False) 5310 ): 5311 self._retreat(index) 5312 return self._parse_types() 5313 5314 return self._parse_column_def(this) 5315 5316 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5317 if not self._match_text_seq("AT", "TIME", "ZONE"): 5318 return this 5319 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5320 5321 def _parse_column(self) -> t.Optional[exp.Expression]: 5322 this = self._parse_column_reference() 5323 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5324 5325 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5326 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5327 5328 return column 5329 5330 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5331 this = self._parse_field() 5332 if ( 5333 not this 5334 and self._match(TokenType.VALUES, advance=False) 5335 and self.VALUES_FOLLOWED_BY_PAREN 5336 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5337 ): 5338 this = self._parse_id_var() 5339 5340 if isinstance(this, exp.Identifier): 5341 # We bubble up comments from the Identifier to the Column 5342 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5343 5344 return this 5345 5346 def _parse_colon_as_variant_extract( 5347 self, this: t.Optional[exp.Expression] 5348 ) -> t.Optional[exp.Expression]: 5349 casts = [] 5350 json_path = [] 5351 escape = None 5352 5353 while self._match(TokenType.COLON): 5354 start_index = self._index 5355 5356 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5357 path = self._parse_column_ops( 5358 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5359 ) 5360 5361 # The cast :: operator has a lower precedence than the extraction operator :, so 5362 # we rearrange the AST appropriately to avoid casting the JSON path 5363 while isinstance(path, exp.Cast): 5364 casts.append(path.to) 5365 path = path.this 5366 5367 if casts: 5368 dcolon_offset = next( 5369 i 5370 for i, t in enumerate(self._tokens[start_index:]) 5371 if t.token_type == TokenType.DCOLON 5372 ) 5373 end_token = self._tokens[start_index + dcolon_offset - 1] 5374 else: 5375 end_token = self._prev 5376 5377 if path: 5378 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5379 # it'll roundtrip to a string literal in GET_PATH 5380 if isinstance(path, exp.Identifier) and path.quoted: 5381 escape = True 5382 5383 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5384 5385 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5386 # Databricks transforms it back to the colon/dot notation 5387 if json_path: 5388 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5389 5390 if json_path_expr: 5391 json_path_expr.set("escape", escape) 5392 5393 this = self.expression( 5394 exp.JSONExtract, 5395 this=this, 5396 expression=json_path_expr, 5397 variant_extract=True, 5398 ) 5399 5400 while casts: 5401 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5402 5403 return this 5404 5405 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5406 return self._parse_types() 5407 5408 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5409 this = self._parse_bracket(this) 5410 5411 while self._match_set(self.COLUMN_OPERATORS): 5412 op_token = self._prev.token_type 5413 op = self.COLUMN_OPERATORS.get(op_token) 5414 5415 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5416 field = self._parse_dcolon() 5417 if not field: 5418 self.raise_error("Expected type") 5419 elif op and self._curr: 5420 field = self._parse_column_reference() or self._parse_bracket() 5421 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5422 field = self._parse_column_ops(field) 5423 else: 5424 field = self._parse_field(any_token=True, anonymous_func=True) 5425 5426 if isinstance(field, (exp.Func, exp.Window)) and this: 5427 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5428 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5429 this = exp.replace_tree( 5430 this, 5431 lambda n: ( 5432 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5433 if n.table 5434 else n.this 5435 ) 5436 if isinstance(n, exp.Column) 5437 else n, 5438 ) 5439 5440 if op: 5441 this = op(self, this, field) 5442 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5443 this = self.expression( 5444 exp.Column, 5445 comments=this.comments, 5446 this=field, 5447 table=this.this, 5448 db=this.args.get("table"), 5449 catalog=this.args.get("db"), 5450 ) 5451 elif isinstance(field, exp.Window): 5452 # Move the exp.Dot's to the window's function 5453 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5454 field.set("this", window_func) 5455 this = field 5456 else: 5457 this = self.expression(exp.Dot, this=this, expression=field) 5458 5459 if field and field.comments: 5460 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5461 5462 this = self._parse_bracket(this) 5463 5464 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5465 5466 def _parse_primary(self) -> t.Optional[exp.Expression]: 5467 if self._match_set(self.PRIMARY_PARSERS): 5468 token_type = self._prev.token_type 5469 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5470 5471 if token_type == TokenType.STRING: 5472 expressions = [primary] 5473 while self._match(TokenType.STRING): 5474 expressions.append(exp.Literal.string(self._prev.text)) 5475 5476 if len(expressions) > 1: 5477 return self.expression(exp.Concat, expressions=expressions) 5478 5479 return primary 5480 5481 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5482 return exp.Literal.number(f"0.{self._prev.text}") 5483 5484 if self._match(TokenType.L_PAREN): 5485 comments = self._prev_comments 5486 query = self._parse_select() 5487 5488 if query: 5489 expressions = [query] 5490 else: 5491 expressions = self._parse_expressions() 5492 5493 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5494 5495 if not this and self._match(TokenType.R_PAREN, advance=False): 5496 this = self.expression(exp.Tuple) 5497 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5498 this = self._parse_subquery(this=this, parse_alias=False) 5499 elif isinstance(this, exp.Subquery): 5500 this = self._parse_subquery( 5501 this=self._parse_set_operations(this), parse_alias=False 5502 ) 5503 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5504 this = self.expression(exp.Tuple, expressions=expressions) 5505 else: 5506 this = self.expression(exp.Paren, this=this) 5507 5508 if this: 5509 this.add_comments(comments) 5510 5511 self._match_r_paren(expression=this) 5512 return this 5513 5514 return None 5515 5516 def _parse_field( 5517 self, 5518 any_token: bool = False, 5519 tokens: t.Optional[t.Collection[TokenType]] = None, 5520 anonymous_func: bool = False, 5521 ) -> t.Optional[exp.Expression]: 5522 if anonymous_func: 5523 field = ( 5524 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5525 or self._parse_primary() 5526 ) 5527 else: 5528 field = self._parse_primary() or self._parse_function( 5529 anonymous=anonymous_func, any_token=any_token 5530 ) 5531 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5532 5533 def _parse_function( 5534 self, 5535 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5536 anonymous: bool = False, 5537 optional_parens: bool = True, 5538 any_token: bool = False, 5539 ) -> t.Optional[exp.Expression]: 5540 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5541 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5542 fn_syntax = False 5543 if ( 5544 self._match(TokenType.L_BRACE, advance=False) 5545 and self._next 5546 and self._next.text.upper() == "FN" 5547 ): 5548 self._advance(2) 5549 fn_syntax = True 5550 5551 func = self._parse_function_call( 5552 functions=functions, 5553 anonymous=anonymous, 5554 optional_parens=optional_parens, 5555 any_token=any_token, 5556 ) 5557 5558 if fn_syntax: 5559 self._match(TokenType.R_BRACE) 5560 5561 return func 5562 5563 def _parse_function_call( 5564 self, 5565 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5566 anonymous: bool = False, 5567 optional_parens: bool = True, 5568 any_token: bool = False, 5569 ) -> t.Optional[exp.Expression]: 5570 if not self._curr: 5571 return None 5572 5573 comments = self._curr.comments 5574 token_type = self._curr.token_type 5575 this = self._curr.text 5576 upper = this.upper() 5577 5578 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5579 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5580 self._advance() 5581 return self._parse_window(parser(self)) 5582 5583 if not self._next or self._next.token_type != TokenType.L_PAREN: 5584 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5585 self._advance() 5586 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5587 5588 return None 5589 5590 if any_token: 5591 if token_type in self.RESERVED_TOKENS: 5592 return None 5593 elif token_type not in self.FUNC_TOKENS: 5594 return None 5595 5596 self._advance(2) 5597 5598 parser = self.FUNCTION_PARSERS.get(upper) 5599 if parser and not anonymous: 5600 this = parser(self) 5601 else: 5602 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5603 5604 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5605 this = self.expression( 5606 subquery_predicate, comments=comments, this=self._parse_select() 5607 ) 5608 self._match_r_paren() 5609 return this 5610 5611 if functions is None: 5612 functions = self.FUNCTIONS 5613 5614 function = functions.get(upper) 5615 known_function = function and not anonymous 5616 5617 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5618 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5619 5620 post_func_comments = self._curr and self._curr.comments 5621 if known_function and post_func_comments: 5622 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5623 # call we'll construct it as exp.Anonymous, even if it's "known" 5624 if any( 5625 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5626 for comment in post_func_comments 5627 ): 5628 known_function = False 5629 5630 if alias and known_function: 5631 args = self._kv_to_prop_eq(args) 5632 5633 if known_function: 5634 func_builder = t.cast(t.Callable, function) 5635 5636 if "dialect" in func_builder.__code__.co_varnames: 5637 func = func_builder(args, dialect=self.dialect) 5638 else: 5639 func = func_builder(args) 5640 5641 func = self.validate_expression(func, args) 5642 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5643 func.meta["name"] = this 5644 5645 this = func 5646 else: 5647 if token_type == TokenType.IDENTIFIER: 5648 this = exp.Identifier(this=this, quoted=True) 5649 this = self.expression(exp.Anonymous, this=this, expressions=args) 5650 5651 if isinstance(this, exp.Expression): 5652 this.add_comments(comments) 5653 5654 self._match_r_paren(this) 5655 return self._parse_window(this) 5656 5657 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5658 return expression 5659 5660 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5661 transformed = [] 5662 5663 for index, e in enumerate(expressions): 5664 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5665 if isinstance(e, exp.Alias): 5666 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5667 5668 if not isinstance(e, exp.PropertyEQ): 5669 e = self.expression( 5670 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5671 ) 5672 5673 if isinstance(e.this, exp.Column): 5674 e.this.replace(e.this.this) 5675 else: 5676 e = self._to_prop_eq(e, index) 5677 5678 transformed.append(e) 5679 5680 return transformed 5681 5682 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5683 return self._parse_statement() 5684 5685 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5686 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5687 5688 def _parse_user_defined_function( 5689 self, kind: t.Optional[TokenType] = None 5690 ) -> t.Optional[exp.Expression]: 5691 this = self._parse_table_parts(schema=True) 5692 5693 if not self._match(TokenType.L_PAREN): 5694 return this 5695 5696 expressions = self._parse_csv(self._parse_function_parameter) 5697 self._match_r_paren() 5698 return self.expression( 5699 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5700 ) 5701 5702 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5703 literal = self._parse_primary() 5704 if literal: 5705 return self.expression(exp.Introducer, this=token.text, expression=literal) 5706 5707 return self.expression(exp.Identifier, this=token.text) 5708 5709 def _parse_session_parameter(self) -> exp.SessionParameter: 5710 kind = None 5711 this = self._parse_id_var() or self._parse_primary() 5712 5713 if this and self._match(TokenType.DOT): 5714 kind = this.name 5715 this = self._parse_var() or self._parse_primary() 5716 5717 return self.expression(exp.SessionParameter, this=this, kind=kind) 5718 5719 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5720 return self._parse_id_var() 5721 5722 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5723 index = self._index 5724 5725 if self._match(TokenType.L_PAREN): 5726 expressions = t.cast( 5727 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5728 ) 5729 5730 if not self._match(TokenType.R_PAREN): 5731 self._retreat(index) 5732 else: 5733 expressions = [self._parse_lambda_arg()] 5734 5735 if self._match_set(self.LAMBDAS): 5736 return self.LAMBDAS[self._prev.token_type](self, expressions) 5737 5738 self._retreat(index) 5739 5740 this: t.Optional[exp.Expression] 5741 5742 if self._match(TokenType.DISTINCT): 5743 this = self.expression( 5744 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5745 ) 5746 else: 5747 this = self._parse_select_or_expression(alias=alias) 5748 5749 return self._parse_limit( 5750 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5751 ) 5752 5753 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5754 index = self._index 5755 if not self._match(TokenType.L_PAREN): 5756 return this 5757 5758 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5759 # expr can be of both types 5760 if self._match_set(self.SELECT_START_TOKENS): 5761 self._retreat(index) 5762 return this 5763 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5764 self._match_r_paren() 5765 return self.expression(exp.Schema, this=this, expressions=args) 5766 5767 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5768 return self._parse_column_def(self._parse_field(any_token=True)) 5769 5770 def _parse_column_def( 5771 self, this: t.Optional[exp.Expression], computed_column: bool = True 5772 ) -> t.Optional[exp.Expression]: 5773 # column defs are not really columns, they're identifiers 5774 if isinstance(this, exp.Column): 5775 this = this.this 5776 5777 if not computed_column: 5778 self._match(TokenType.ALIAS) 5779 5780 kind = self._parse_types(schema=True) 5781 5782 if self._match_text_seq("FOR", "ORDINALITY"): 5783 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5784 5785 constraints: t.List[exp.Expression] = [] 5786 5787 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5788 ("ALIAS", "MATERIALIZED") 5789 ): 5790 persisted = self._prev.text.upper() == "MATERIALIZED" 5791 constraint_kind = exp.ComputedColumnConstraint( 5792 this=self._parse_assignment(), 5793 persisted=persisted or self._match_text_seq("PERSISTED"), 5794 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5795 ) 5796 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5797 elif ( 5798 kind 5799 and self._match(TokenType.ALIAS, advance=False) 5800 and ( 5801 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5802 or (self._next and self._next.token_type == TokenType.L_PAREN) 5803 ) 5804 ): 5805 self._advance() 5806 constraints.append( 5807 self.expression( 5808 exp.ColumnConstraint, 5809 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5810 ) 5811 ) 5812 5813 while True: 5814 constraint = self._parse_column_constraint() 5815 if not constraint: 5816 break 5817 constraints.append(constraint) 5818 5819 if not kind and not constraints: 5820 return this 5821 5822 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5823 5824 def _parse_auto_increment( 5825 self, 5826 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5827 start = None 5828 increment = None 5829 5830 if self._match(TokenType.L_PAREN, advance=False): 5831 args = self._parse_wrapped_csv(self._parse_bitwise) 5832 start = seq_get(args, 0) 5833 increment = seq_get(args, 1) 5834 elif self._match_text_seq("START"): 5835 start = self._parse_bitwise() 5836 self._match_text_seq("INCREMENT") 5837 increment = self._parse_bitwise() 5838 5839 if start and increment: 5840 return exp.GeneratedAsIdentityColumnConstraint( 5841 start=start, increment=increment, this=False 5842 ) 5843 5844 return exp.AutoIncrementColumnConstraint() 5845 5846 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5847 if not self._match_text_seq("REFRESH"): 5848 self._retreat(self._index - 1) 5849 return None 5850 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5851 5852 def _parse_compress(self) -> exp.CompressColumnConstraint: 5853 if self._match(TokenType.L_PAREN, advance=False): 5854 return self.expression( 5855 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5856 ) 5857 5858 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5859 5860 def _parse_generated_as_identity( 5861 self, 5862 ) -> ( 5863 exp.GeneratedAsIdentityColumnConstraint 5864 | exp.ComputedColumnConstraint 5865 | exp.GeneratedAsRowColumnConstraint 5866 ): 5867 if self._match_text_seq("BY", "DEFAULT"): 5868 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5869 this = self.expression( 5870 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5871 ) 5872 else: 5873 self._match_text_seq("ALWAYS") 5874 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5875 5876 self._match(TokenType.ALIAS) 5877 5878 if self._match_text_seq("ROW"): 5879 start = self._match_text_seq("START") 5880 if not start: 5881 self._match(TokenType.END) 5882 hidden = self._match_text_seq("HIDDEN") 5883 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5884 5885 identity = self._match_text_seq("IDENTITY") 5886 5887 if self._match(TokenType.L_PAREN): 5888 if self._match(TokenType.START_WITH): 5889 this.set("start", self._parse_bitwise()) 5890 if self._match_text_seq("INCREMENT", "BY"): 5891 this.set("increment", self._parse_bitwise()) 5892 if self._match_text_seq("MINVALUE"): 5893 this.set("minvalue", self._parse_bitwise()) 5894 if self._match_text_seq("MAXVALUE"): 5895 this.set("maxvalue", self._parse_bitwise()) 5896 5897 if self._match_text_seq("CYCLE"): 5898 this.set("cycle", True) 5899 elif self._match_text_seq("NO", "CYCLE"): 5900 this.set("cycle", False) 5901 5902 if not identity: 5903 this.set("expression", self._parse_range()) 5904 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5905 args = self._parse_csv(self._parse_bitwise) 5906 this.set("start", seq_get(args, 0)) 5907 this.set("increment", seq_get(args, 1)) 5908 5909 self._match_r_paren() 5910 5911 return this 5912 5913 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5914 self._match_text_seq("LENGTH") 5915 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5916 5917 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5918 if self._match_text_seq("NULL"): 5919 return self.expression(exp.NotNullColumnConstraint) 5920 if self._match_text_seq("CASESPECIFIC"): 5921 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5922 if self._match_text_seq("FOR", "REPLICATION"): 5923 return self.expression(exp.NotForReplicationColumnConstraint) 5924 5925 # Unconsume the `NOT` token 5926 self._retreat(self._index - 1) 5927 return None 5928 5929 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5930 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5931 5932 procedure_option_follows = ( 5933 self._match(TokenType.WITH, advance=False) 5934 and self._next 5935 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5936 ) 5937 5938 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5939 return self.expression( 5940 exp.ColumnConstraint, 5941 this=this, 5942 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5943 ) 5944 5945 return this 5946 5947 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5948 if not self._match(TokenType.CONSTRAINT): 5949 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5950 5951 return self.expression( 5952 exp.Constraint, 5953 this=self._parse_id_var(), 5954 expressions=self._parse_unnamed_constraints(), 5955 ) 5956 5957 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5958 constraints = [] 5959 while True: 5960 constraint = self._parse_unnamed_constraint() or self._parse_function() 5961 if not constraint: 5962 break 5963 constraints.append(constraint) 5964 5965 return constraints 5966 5967 def _parse_unnamed_constraint( 5968 self, constraints: t.Optional[t.Collection[str]] = None 5969 ) -> t.Optional[exp.Expression]: 5970 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5971 constraints or self.CONSTRAINT_PARSERS 5972 ): 5973 return None 5974 5975 constraint = self._prev.text.upper() 5976 if constraint not in self.CONSTRAINT_PARSERS: 5977 self.raise_error(f"No parser found for schema constraint {constraint}.") 5978 5979 return self.CONSTRAINT_PARSERS[constraint](self) 5980 5981 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5982 return self._parse_id_var(any_token=False) 5983 5984 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5985 self._match_text_seq("KEY") 5986 return self.expression( 5987 exp.UniqueColumnConstraint, 5988 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5989 this=self._parse_schema(self._parse_unique_key()), 5990 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5991 on_conflict=self._parse_on_conflict(), 5992 ) 5993 5994 def _parse_key_constraint_options(self) -> t.List[str]: 5995 options = [] 5996 while True: 5997 if not self._curr: 5998 break 5999 6000 if self._match(TokenType.ON): 6001 action = None 6002 on = self._advance_any() and self._prev.text 6003 6004 if self._match_text_seq("NO", "ACTION"): 6005 action = "NO ACTION" 6006 elif self._match_text_seq("CASCADE"): 6007 action = "CASCADE" 6008 elif self._match_text_seq("RESTRICT"): 6009 action = "RESTRICT" 6010 elif self._match_pair(TokenType.SET, TokenType.NULL): 6011 action = "SET NULL" 6012 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6013 action = "SET DEFAULT" 6014 else: 6015 self.raise_error("Invalid key constraint") 6016 6017 options.append(f"ON {on} {action}") 6018 else: 6019 var = self._parse_var_from_options( 6020 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6021 ) 6022 if not var: 6023 break 6024 options.append(var.name) 6025 6026 return options 6027 6028 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6029 if match and not self._match(TokenType.REFERENCES): 6030 return None 6031 6032 expressions = None 6033 this = self._parse_table(schema=True) 6034 options = self._parse_key_constraint_options() 6035 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6036 6037 def _parse_foreign_key(self) -> exp.ForeignKey: 6038 expressions = self._parse_wrapped_id_vars() 6039 reference = self._parse_references() 6040 options = {} 6041 6042 while self._match(TokenType.ON): 6043 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6044 self.raise_error("Expected DELETE or UPDATE") 6045 6046 kind = self._prev.text.lower() 6047 6048 if self._match_text_seq("NO", "ACTION"): 6049 action = "NO ACTION" 6050 elif self._match(TokenType.SET): 6051 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6052 action = "SET " + self._prev.text.upper() 6053 else: 6054 self._advance() 6055 action = self._prev.text.upper() 6056 6057 options[kind] = action 6058 6059 return self.expression( 6060 exp.ForeignKey, 6061 expressions=expressions, 6062 reference=reference, 6063 **options, # type: ignore 6064 ) 6065 6066 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6067 return self._parse_ordered() or self._parse_field() 6068 6069 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6070 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6071 self._retreat(self._index - 1) 6072 return None 6073 6074 id_vars = self._parse_wrapped_id_vars() 6075 return self.expression( 6076 exp.PeriodForSystemTimeConstraint, 6077 this=seq_get(id_vars, 0), 6078 expression=seq_get(id_vars, 1), 6079 ) 6080 6081 def _parse_primary_key( 6082 self, wrapped_optional: bool = False, in_props: bool = False 6083 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6084 desc = ( 6085 self._match_set((TokenType.ASC, TokenType.DESC)) 6086 and self._prev.token_type == TokenType.DESC 6087 ) 6088 6089 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6090 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6091 6092 expressions = self._parse_wrapped_csv( 6093 self._parse_primary_key_part, optional=wrapped_optional 6094 ) 6095 options = self._parse_key_constraint_options() 6096 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6097 6098 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6099 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6100 6101 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6102 """ 6103 Parses a datetime column in ODBC format. We parse the column into the corresponding 6104 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6105 same as we did for `DATE('yyyy-mm-dd')`. 6106 6107 Reference: 6108 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6109 """ 6110 self._match(TokenType.VAR) 6111 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6112 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6113 if not self._match(TokenType.R_BRACE): 6114 self.raise_error("Expected }") 6115 return expression 6116 6117 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6118 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6119 return this 6120 6121 bracket_kind = self._prev.token_type 6122 if ( 6123 bracket_kind == TokenType.L_BRACE 6124 and self._curr 6125 and self._curr.token_type == TokenType.VAR 6126 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6127 ): 6128 return self._parse_odbc_datetime_literal() 6129 6130 expressions = self._parse_csv( 6131 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6132 ) 6133 6134 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6135 self.raise_error("Expected ]") 6136 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6137 self.raise_error("Expected }") 6138 6139 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6140 if bracket_kind == TokenType.L_BRACE: 6141 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6142 elif not this: 6143 this = build_array_constructor( 6144 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6145 ) 6146 else: 6147 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6148 if constructor_type: 6149 return build_array_constructor( 6150 constructor_type, 6151 args=expressions, 6152 bracket_kind=bracket_kind, 6153 dialect=self.dialect, 6154 ) 6155 6156 expressions = apply_index_offset( 6157 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6158 ) 6159 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6160 6161 self._add_comments(this) 6162 return self._parse_bracket(this) 6163 6164 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6165 if self._match(TokenType.COLON): 6166 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6167 return this 6168 6169 def _parse_case(self) -> t.Optional[exp.Expression]: 6170 ifs = [] 6171 default = None 6172 6173 comments = self._prev_comments 6174 expression = self._parse_assignment() 6175 6176 while self._match(TokenType.WHEN): 6177 this = self._parse_assignment() 6178 self._match(TokenType.THEN) 6179 then = self._parse_assignment() 6180 ifs.append(self.expression(exp.If, this=this, true=then)) 6181 6182 if self._match(TokenType.ELSE): 6183 default = self._parse_assignment() 6184 6185 if not self._match(TokenType.END): 6186 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6187 default = exp.column("interval") 6188 else: 6189 self.raise_error("Expected END after CASE", self._prev) 6190 6191 return self.expression( 6192 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6193 ) 6194 6195 def _parse_if(self) -> t.Optional[exp.Expression]: 6196 if self._match(TokenType.L_PAREN): 6197 args = self._parse_csv( 6198 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6199 ) 6200 this = self.validate_expression(exp.If.from_arg_list(args), args) 6201 self._match_r_paren() 6202 else: 6203 index = self._index - 1 6204 6205 if self.NO_PAREN_IF_COMMANDS and index == 0: 6206 return self._parse_as_command(self._prev) 6207 6208 condition = self._parse_assignment() 6209 6210 if not condition: 6211 self._retreat(index) 6212 return None 6213 6214 self._match(TokenType.THEN) 6215 true = self._parse_assignment() 6216 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6217 self._match(TokenType.END) 6218 this = self.expression(exp.If, this=condition, true=true, false=false) 6219 6220 return this 6221 6222 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6223 if not self._match_text_seq("VALUE", "FOR"): 6224 self._retreat(self._index - 1) 6225 return None 6226 6227 return self.expression( 6228 exp.NextValueFor, 6229 this=self._parse_column(), 6230 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6231 ) 6232 6233 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6234 this = self._parse_function() or self._parse_var_or_string(upper=True) 6235 6236 if self._match(TokenType.FROM): 6237 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6238 6239 if not self._match(TokenType.COMMA): 6240 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6241 6242 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6243 6244 def _parse_gap_fill(self) -> exp.GapFill: 6245 self._match(TokenType.TABLE) 6246 this = self._parse_table() 6247 6248 self._match(TokenType.COMMA) 6249 args = [this, *self._parse_csv(self._parse_lambda)] 6250 6251 gap_fill = exp.GapFill.from_arg_list(args) 6252 return self.validate_expression(gap_fill, args) 6253 6254 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6255 this = self._parse_assignment() 6256 6257 if not self._match(TokenType.ALIAS): 6258 if self._match(TokenType.COMMA): 6259 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6260 6261 self.raise_error("Expected AS after CAST") 6262 6263 fmt = None 6264 to = self._parse_types() 6265 6266 default = self._match(TokenType.DEFAULT) 6267 if default: 6268 default = self._parse_bitwise() 6269 self._match_text_seq("ON", "CONVERSION", "ERROR") 6270 6271 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6272 fmt_string = self._parse_string() 6273 fmt = self._parse_at_time_zone(fmt_string) 6274 6275 if not to: 6276 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6277 if to.this in exp.DataType.TEMPORAL_TYPES: 6278 this = self.expression( 6279 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6280 this=this, 6281 format=exp.Literal.string( 6282 format_time( 6283 fmt_string.this if fmt_string else "", 6284 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6285 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6286 ) 6287 ), 6288 safe=safe, 6289 ) 6290 6291 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6292 this.set("zone", fmt.args["zone"]) 6293 return this 6294 elif not to: 6295 self.raise_error("Expected TYPE after CAST") 6296 elif isinstance(to, exp.Identifier): 6297 to = exp.DataType.build(to.name, udt=True) 6298 elif to.this == exp.DataType.Type.CHAR: 6299 if self._match(TokenType.CHARACTER_SET): 6300 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6301 6302 return self.expression( 6303 exp.Cast if strict else exp.TryCast, 6304 this=this, 6305 to=to, 6306 format=fmt, 6307 safe=safe, 6308 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6309 default=default, 6310 ) 6311 6312 def _parse_string_agg(self) -> exp.GroupConcat: 6313 if self._match(TokenType.DISTINCT): 6314 args: t.List[t.Optional[exp.Expression]] = [ 6315 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6316 ] 6317 if self._match(TokenType.COMMA): 6318 args.extend(self._parse_csv(self._parse_assignment)) 6319 else: 6320 args = self._parse_csv(self._parse_assignment) # type: ignore 6321 6322 if self._match_text_seq("ON", "OVERFLOW"): 6323 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6324 if self._match_text_seq("ERROR"): 6325 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6326 else: 6327 self._match_text_seq("TRUNCATE") 6328 on_overflow = self.expression( 6329 exp.OverflowTruncateBehavior, 6330 this=self._parse_string(), 6331 with_count=( 6332 self._match_text_seq("WITH", "COUNT") 6333 or not self._match_text_seq("WITHOUT", "COUNT") 6334 ), 6335 ) 6336 else: 6337 on_overflow = None 6338 6339 index = self._index 6340 if not self._match(TokenType.R_PAREN) and args: 6341 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6342 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6343 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6344 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6345 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6346 6347 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6348 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6349 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6350 if not self._match_text_seq("WITHIN", "GROUP"): 6351 self._retreat(index) 6352 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6353 6354 # The corresponding match_r_paren will be called in parse_function (caller) 6355 self._match_l_paren() 6356 6357 return self.expression( 6358 exp.GroupConcat, 6359 this=self._parse_order(this=seq_get(args, 0)), 6360 separator=seq_get(args, 1), 6361 on_overflow=on_overflow, 6362 ) 6363 6364 def _parse_convert( 6365 self, strict: bool, safe: t.Optional[bool] = None 6366 ) -> t.Optional[exp.Expression]: 6367 this = self._parse_bitwise() 6368 6369 if self._match(TokenType.USING): 6370 to: t.Optional[exp.Expression] = self.expression( 6371 exp.CharacterSet, this=self._parse_var() 6372 ) 6373 elif self._match(TokenType.COMMA): 6374 to = self._parse_types() 6375 else: 6376 to = None 6377 6378 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6379 6380 def _parse_xml_table(self) -> exp.XMLTable: 6381 namespaces = None 6382 passing = None 6383 columns = None 6384 6385 if self._match_text_seq("XMLNAMESPACES", "("): 6386 namespaces = self._parse_xml_namespace() 6387 self._match_text_seq(")", ",") 6388 6389 this = self._parse_string() 6390 6391 if self._match_text_seq("PASSING"): 6392 # The BY VALUE keywords are optional and are provided for semantic clarity 6393 self._match_text_seq("BY", "VALUE") 6394 passing = self._parse_csv(self._parse_column) 6395 6396 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6397 6398 if self._match_text_seq("COLUMNS"): 6399 columns = self._parse_csv(self._parse_field_def) 6400 6401 return self.expression( 6402 exp.XMLTable, 6403 this=this, 6404 namespaces=namespaces, 6405 passing=passing, 6406 columns=columns, 6407 by_ref=by_ref, 6408 ) 6409 6410 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6411 namespaces = [] 6412 6413 while True: 6414 if self._match(TokenType.DEFAULT): 6415 uri = self._parse_string() 6416 else: 6417 uri = self._parse_alias(self._parse_string()) 6418 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6419 if not self._match(TokenType.COMMA): 6420 break 6421 6422 return namespaces 6423 6424 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6425 """ 6426 There are generally two variants of the DECODE function: 6427 6428 - DECODE(bin, charset) 6429 - DECODE(expression, search, result [, search, result] ... [, default]) 6430 6431 The second variant will always be parsed into a CASE expression. Note that NULL 6432 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6433 instead of relying on pattern matching. 6434 """ 6435 args = self._parse_csv(self._parse_assignment) 6436 6437 if len(args) < 3: 6438 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6439 6440 expression, *expressions = args 6441 if not expression: 6442 return None 6443 6444 ifs = [] 6445 for search, result in zip(expressions[::2], expressions[1::2]): 6446 if not search or not result: 6447 return None 6448 6449 if isinstance(search, exp.Literal): 6450 ifs.append( 6451 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6452 ) 6453 elif isinstance(search, exp.Null): 6454 ifs.append( 6455 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6456 ) 6457 else: 6458 cond = exp.or_( 6459 exp.EQ(this=expression.copy(), expression=search), 6460 exp.and_( 6461 exp.Is(this=expression.copy(), expression=exp.Null()), 6462 exp.Is(this=search.copy(), expression=exp.Null()), 6463 copy=False, 6464 ), 6465 copy=False, 6466 ) 6467 ifs.append(exp.If(this=cond, true=result)) 6468 6469 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6470 6471 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6472 self._match_text_seq("KEY") 6473 key = self._parse_column() 6474 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6475 self._match_text_seq("VALUE") 6476 value = self._parse_bitwise() 6477 6478 if not key and not value: 6479 return None 6480 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6481 6482 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6483 if not this or not self._match_text_seq("FORMAT", "JSON"): 6484 return this 6485 6486 return self.expression(exp.FormatJson, this=this) 6487 6488 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6489 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6490 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6491 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6492 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6493 else: 6494 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6495 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6496 6497 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6498 6499 if not empty and not error and not null: 6500 return None 6501 6502 return self.expression( 6503 exp.OnCondition, 6504 empty=empty, 6505 error=error, 6506 null=null, 6507 ) 6508 6509 def _parse_on_handling( 6510 self, on: str, *values: str 6511 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6512 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6513 for value in values: 6514 if self._match_text_seq(value, "ON", on): 6515 return f"{value} ON {on}" 6516 6517 index = self._index 6518 if self._match(TokenType.DEFAULT): 6519 default_value = self._parse_bitwise() 6520 if self._match_text_seq("ON", on): 6521 return default_value 6522 6523 self._retreat(index) 6524 6525 return None 6526 6527 @t.overload 6528 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6529 6530 @t.overload 6531 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6532 6533 def _parse_json_object(self, agg=False): 6534 star = self._parse_star() 6535 expressions = ( 6536 [star] 6537 if star 6538 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6539 ) 6540 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6541 6542 unique_keys = None 6543 if self._match_text_seq("WITH", "UNIQUE"): 6544 unique_keys = True 6545 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6546 unique_keys = False 6547 6548 self._match_text_seq("KEYS") 6549 6550 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6551 self._parse_type() 6552 ) 6553 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6554 6555 return self.expression( 6556 exp.JSONObjectAgg if agg else exp.JSONObject, 6557 expressions=expressions, 6558 null_handling=null_handling, 6559 unique_keys=unique_keys, 6560 return_type=return_type, 6561 encoding=encoding, 6562 ) 6563 6564 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6565 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6566 if not self._match_text_seq("NESTED"): 6567 this = self._parse_id_var() 6568 kind = self._parse_types(allow_identifiers=False) 6569 nested = None 6570 else: 6571 this = None 6572 kind = None 6573 nested = True 6574 6575 path = self._match_text_seq("PATH") and self._parse_string() 6576 nested_schema = nested and self._parse_json_schema() 6577 6578 return self.expression( 6579 exp.JSONColumnDef, 6580 this=this, 6581 kind=kind, 6582 path=path, 6583 nested_schema=nested_schema, 6584 ) 6585 6586 def _parse_json_schema(self) -> exp.JSONSchema: 6587 self._match_text_seq("COLUMNS") 6588 return self.expression( 6589 exp.JSONSchema, 6590 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6591 ) 6592 6593 def _parse_json_table(self) -> exp.JSONTable: 6594 this = self._parse_format_json(self._parse_bitwise()) 6595 path = self._match(TokenType.COMMA) and self._parse_string() 6596 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6597 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6598 schema = self._parse_json_schema() 6599 6600 return exp.JSONTable( 6601 this=this, 6602 schema=schema, 6603 path=path, 6604 error_handling=error_handling, 6605 empty_handling=empty_handling, 6606 ) 6607 6608 def _parse_match_against(self) -> exp.MatchAgainst: 6609 expressions = self._parse_csv(self._parse_column) 6610 6611 self._match_text_seq(")", "AGAINST", "(") 6612 6613 this = self._parse_string() 6614 6615 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6616 modifier = "IN NATURAL LANGUAGE MODE" 6617 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6618 modifier = f"{modifier} WITH QUERY EXPANSION" 6619 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6620 modifier = "IN BOOLEAN MODE" 6621 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6622 modifier = "WITH QUERY EXPANSION" 6623 else: 6624 modifier = None 6625 6626 return self.expression( 6627 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6628 ) 6629 6630 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6631 def _parse_open_json(self) -> exp.OpenJSON: 6632 this = self._parse_bitwise() 6633 path = self._match(TokenType.COMMA) and self._parse_string() 6634 6635 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6636 this = self._parse_field(any_token=True) 6637 kind = self._parse_types() 6638 path = self._parse_string() 6639 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6640 6641 return self.expression( 6642 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6643 ) 6644 6645 expressions = None 6646 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6647 self._match_l_paren() 6648 expressions = self._parse_csv(_parse_open_json_column_def) 6649 6650 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6651 6652 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6653 args = self._parse_csv(self._parse_bitwise) 6654 6655 if self._match(TokenType.IN): 6656 return self.expression( 6657 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6658 ) 6659 6660 if haystack_first: 6661 haystack = seq_get(args, 0) 6662 needle = seq_get(args, 1) 6663 else: 6664 haystack = seq_get(args, 1) 6665 needle = seq_get(args, 0) 6666 6667 return self.expression( 6668 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6669 ) 6670 6671 def _parse_predict(self) -> exp.Predict: 6672 self._match_text_seq("MODEL") 6673 this = self._parse_table() 6674 6675 self._match(TokenType.COMMA) 6676 self._match_text_seq("TABLE") 6677 6678 return self.expression( 6679 exp.Predict, 6680 this=this, 6681 expression=self._parse_table(), 6682 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6683 ) 6684 6685 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6686 args = self._parse_csv(self._parse_table) 6687 return exp.JoinHint(this=func_name.upper(), expressions=args) 6688 6689 def _parse_substring(self) -> exp.Substring: 6690 # Postgres supports the form: substring(string [from int] [for int]) 6691 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6692 6693 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6694 6695 if self._match(TokenType.FROM): 6696 args.append(self._parse_bitwise()) 6697 if self._match(TokenType.FOR): 6698 if len(args) == 1: 6699 args.append(exp.Literal.number(1)) 6700 args.append(self._parse_bitwise()) 6701 6702 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6703 6704 def _parse_trim(self) -> exp.Trim: 6705 # https://www.w3resource.com/sql/character-functions/trim.php 6706 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6707 6708 position = None 6709 collation = None 6710 expression = None 6711 6712 if self._match_texts(self.TRIM_TYPES): 6713 position = self._prev.text.upper() 6714 6715 this = self._parse_bitwise() 6716 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6717 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6718 expression = self._parse_bitwise() 6719 6720 if invert_order: 6721 this, expression = expression, this 6722 6723 if self._match(TokenType.COLLATE): 6724 collation = self._parse_bitwise() 6725 6726 return self.expression( 6727 exp.Trim, this=this, position=position, expression=expression, collation=collation 6728 ) 6729 6730 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6731 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6732 6733 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6734 return self._parse_window(self._parse_id_var(), alias=True) 6735 6736 def _parse_respect_or_ignore_nulls( 6737 self, this: t.Optional[exp.Expression] 6738 ) -> t.Optional[exp.Expression]: 6739 if self._match_text_seq("IGNORE", "NULLS"): 6740 return self.expression(exp.IgnoreNulls, this=this) 6741 if self._match_text_seq("RESPECT", "NULLS"): 6742 return self.expression(exp.RespectNulls, this=this) 6743 return this 6744 6745 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6746 if self._match(TokenType.HAVING): 6747 self._match_texts(("MAX", "MIN")) 6748 max = self._prev.text.upper() != "MIN" 6749 return self.expression( 6750 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6751 ) 6752 6753 return this 6754 6755 def _parse_window( 6756 self, this: t.Optional[exp.Expression], alias: bool = False 6757 ) -> t.Optional[exp.Expression]: 6758 func = this 6759 comments = func.comments if isinstance(func, exp.Expression) else None 6760 6761 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6762 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6763 if self._match_text_seq("WITHIN", "GROUP"): 6764 order = self._parse_wrapped(self._parse_order) 6765 this = self.expression(exp.WithinGroup, this=this, expression=order) 6766 6767 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6768 self._match(TokenType.WHERE) 6769 this = self.expression( 6770 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6771 ) 6772 self._match_r_paren() 6773 6774 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6775 # Some dialects choose to implement and some do not. 6776 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6777 6778 # There is some code above in _parse_lambda that handles 6779 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6780 6781 # The below changes handle 6782 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6783 6784 # Oracle allows both formats 6785 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6786 # and Snowflake chose to do the same for familiarity 6787 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6788 if isinstance(this, exp.AggFunc): 6789 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6790 6791 if ignore_respect and ignore_respect is not this: 6792 ignore_respect.replace(ignore_respect.this) 6793 this = self.expression(ignore_respect.__class__, this=this) 6794 6795 this = self._parse_respect_or_ignore_nulls(this) 6796 6797 # bigquery select from window x AS (partition by ...) 6798 if alias: 6799 over = None 6800 self._match(TokenType.ALIAS) 6801 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6802 return this 6803 else: 6804 over = self._prev.text.upper() 6805 6806 if comments and isinstance(func, exp.Expression): 6807 func.pop_comments() 6808 6809 if not self._match(TokenType.L_PAREN): 6810 return self.expression( 6811 exp.Window, 6812 comments=comments, 6813 this=this, 6814 alias=self._parse_id_var(False), 6815 over=over, 6816 ) 6817 6818 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6819 6820 first = self._match(TokenType.FIRST) 6821 if self._match_text_seq("LAST"): 6822 first = False 6823 6824 partition, order = self._parse_partition_and_order() 6825 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6826 6827 if kind: 6828 self._match(TokenType.BETWEEN) 6829 start = self._parse_window_spec() 6830 self._match(TokenType.AND) 6831 end = self._parse_window_spec() 6832 6833 spec = self.expression( 6834 exp.WindowSpec, 6835 kind=kind, 6836 start=start["value"], 6837 start_side=start["side"], 6838 end=end["value"], 6839 end_side=end["side"], 6840 ) 6841 else: 6842 spec = None 6843 6844 self._match_r_paren() 6845 6846 window = self.expression( 6847 exp.Window, 6848 comments=comments, 6849 this=this, 6850 partition_by=partition, 6851 order=order, 6852 spec=spec, 6853 alias=window_alias, 6854 over=over, 6855 first=first, 6856 ) 6857 6858 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6859 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6860 return self._parse_window(window, alias=alias) 6861 6862 return window 6863 6864 def _parse_partition_and_order( 6865 self, 6866 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6867 return self._parse_partition_by(), self._parse_order() 6868 6869 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6870 self._match(TokenType.BETWEEN) 6871 6872 return { 6873 "value": ( 6874 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6875 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6876 or self._parse_bitwise() 6877 ), 6878 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6879 } 6880 6881 def _parse_alias( 6882 self, this: t.Optional[exp.Expression], explicit: bool = False 6883 ) -> t.Optional[exp.Expression]: 6884 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6885 # so this section tries to parse the clause version and if it fails, it treats the token 6886 # as an identifier (alias) 6887 if self._can_parse_limit_or_offset(): 6888 return this 6889 6890 any_token = self._match(TokenType.ALIAS) 6891 comments = self._prev_comments or [] 6892 6893 if explicit and not any_token: 6894 return this 6895 6896 if self._match(TokenType.L_PAREN): 6897 aliases = self.expression( 6898 exp.Aliases, 6899 comments=comments, 6900 this=this, 6901 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6902 ) 6903 self._match_r_paren(aliases) 6904 return aliases 6905 6906 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6907 self.STRING_ALIASES and self._parse_string_as_identifier() 6908 ) 6909 6910 if alias: 6911 comments.extend(alias.pop_comments()) 6912 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6913 column = this.this 6914 6915 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6916 if not this.comments and column and column.comments: 6917 this.comments = column.pop_comments() 6918 6919 return this 6920 6921 def _parse_id_var( 6922 self, 6923 any_token: bool = True, 6924 tokens: t.Optional[t.Collection[TokenType]] = None, 6925 ) -> t.Optional[exp.Expression]: 6926 expression = self._parse_identifier() 6927 if not expression and ( 6928 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6929 ): 6930 quoted = self._prev.token_type == TokenType.STRING 6931 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6932 6933 return expression 6934 6935 def _parse_string(self) -> t.Optional[exp.Expression]: 6936 if self._match_set(self.STRING_PARSERS): 6937 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6938 return self._parse_placeholder() 6939 6940 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6941 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6942 6943 def _parse_number(self) -> t.Optional[exp.Expression]: 6944 if self._match_set(self.NUMERIC_PARSERS): 6945 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6946 return self._parse_placeholder() 6947 6948 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6949 if self._match(TokenType.IDENTIFIER): 6950 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6951 return self._parse_placeholder() 6952 6953 def _parse_var( 6954 self, 6955 any_token: bool = False, 6956 tokens: t.Optional[t.Collection[TokenType]] = None, 6957 upper: bool = False, 6958 ) -> t.Optional[exp.Expression]: 6959 if ( 6960 (any_token and self._advance_any()) 6961 or self._match(TokenType.VAR) 6962 or (self._match_set(tokens) if tokens else False) 6963 ): 6964 return self.expression( 6965 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6966 ) 6967 return self._parse_placeholder() 6968 6969 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6970 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6971 self._advance() 6972 return self._prev 6973 return None 6974 6975 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6976 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6977 6978 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6979 return self._parse_primary() or self._parse_var(any_token=True) 6980 6981 def _parse_null(self) -> t.Optional[exp.Expression]: 6982 if self._match_set(self.NULL_TOKENS): 6983 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6984 return self._parse_placeholder() 6985 6986 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6987 if self._match(TokenType.TRUE): 6988 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6989 if self._match(TokenType.FALSE): 6990 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6991 return self._parse_placeholder() 6992 6993 def _parse_star(self) -> t.Optional[exp.Expression]: 6994 if self._match(TokenType.STAR): 6995 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6996 return self._parse_placeholder() 6997 6998 def _parse_parameter(self) -> exp.Parameter: 6999 this = self._parse_identifier() or self._parse_primary_or_var() 7000 return self.expression(exp.Parameter, this=this) 7001 7002 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7003 if self._match_set(self.PLACEHOLDER_PARSERS): 7004 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7005 if placeholder: 7006 return placeholder 7007 self._advance(-1) 7008 return None 7009 7010 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7011 if not self._match_texts(keywords): 7012 return None 7013 if self._match(TokenType.L_PAREN, advance=False): 7014 return self._parse_wrapped_csv(self._parse_expression) 7015 7016 expression = self._parse_expression() 7017 return [expression] if expression else None 7018 7019 def _parse_csv( 7020 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7021 ) -> t.List[exp.Expression]: 7022 parse_result = parse_method() 7023 items = [parse_result] if parse_result is not None else [] 7024 7025 while self._match(sep): 7026 self._add_comments(parse_result) 7027 parse_result = parse_method() 7028 if parse_result is not None: 7029 items.append(parse_result) 7030 7031 return items 7032 7033 def _parse_tokens( 7034 self, parse_method: t.Callable, expressions: t.Dict 7035 ) -> t.Optional[exp.Expression]: 7036 this = parse_method() 7037 7038 while self._match_set(expressions): 7039 this = self.expression( 7040 expressions[self._prev.token_type], 7041 this=this, 7042 comments=self._prev_comments, 7043 expression=parse_method(), 7044 ) 7045 7046 return this 7047 7048 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7049 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7050 7051 def _parse_wrapped_csv( 7052 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7053 ) -> t.List[exp.Expression]: 7054 return self._parse_wrapped( 7055 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7056 ) 7057 7058 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7059 wrapped = self._match(TokenType.L_PAREN) 7060 if not wrapped and not optional: 7061 self.raise_error("Expecting (") 7062 parse_result = parse_method() 7063 if wrapped: 7064 self._match_r_paren() 7065 return parse_result 7066 7067 def _parse_expressions(self) -> t.List[exp.Expression]: 7068 return self._parse_csv(self._parse_expression) 7069 7070 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7071 return self._parse_select() or self._parse_set_operations( 7072 self._parse_alias(self._parse_assignment(), explicit=True) 7073 if alias 7074 else self._parse_assignment() 7075 ) 7076 7077 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7078 return self._parse_query_modifiers( 7079 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7080 ) 7081 7082 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7083 this = None 7084 if self._match_texts(self.TRANSACTION_KIND): 7085 this = self._prev.text 7086 7087 self._match_texts(("TRANSACTION", "WORK")) 7088 7089 modes = [] 7090 while True: 7091 mode = [] 7092 while self._match(TokenType.VAR): 7093 mode.append(self._prev.text) 7094 7095 if mode: 7096 modes.append(" ".join(mode)) 7097 if not self._match(TokenType.COMMA): 7098 break 7099 7100 return self.expression(exp.Transaction, this=this, modes=modes) 7101 7102 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7103 chain = None 7104 savepoint = None 7105 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7106 7107 self._match_texts(("TRANSACTION", "WORK")) 7108 7109 if self._match_text_seq("TO"): 7110 self._match_text_seq("SAVEPOINT") 7111 savepoint = self._parse_id_var() 7112 7113 if self._match(TokenType.AND): 7114 chain = not self._match_text_seq("NO") 7115 self._match_text_seq("CHAIN") 7116 7117 if is_rollback: 7118 return self.expression(exp.Rollback, savepoint=savepoint) 7119 7120 return self.expression(exp.Commit, chain=chain) 7121 7122 def _parse_refresh(self) -> exp.Refresh: 7123 self._match(TokenType.TABLE) 7124 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7125 7126 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7127 if not self._match_text_seq("ADD"): 7128 return None 7129 7130 self._match(TokenType.COLUMN) 7131 exists_column = self._parse_exists(not_=True) 7132 expression = self._parse_field_def() 7133 7134 if expression: 7135 expression.set("exists", exists_column) 7136 7137 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7138 if self._match_texts(("FIRST", "AFTER")): 7139 position = self._prev.text 7140 column_position = self.expression( 7141 exp.ColumnPosition, this=self._parse_column(), position=position 7142 ) 7143 expression.set("position", column_position) 7144 7145 return expression 7146 7147 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7148 drop = self._match(TokenType.DROP) and self._parse_drop() 7149 if drop and not isinstance(drop, exp.Command): 7150 drop.set("kind", drop.args.get("kind", "COLUMN")) 7151 return drop 7152 7153 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7154 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7155 return self.expression( 7156 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7157 ) 7158 7159 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7160 index = self._index - 1 7161 7162 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7163 return self._parse_csv( 7164 lambda: self.expression( 7165 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7166 ) 7167 ) 7168 7169 self._retreat(index) 7170 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7171 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7172 7173 if self._match_text_seq("ADD", "COLUMNS"): 7174 schema = self._parse_schema() 7175 if schema: 7176 return [schema] 7177 return [] 7178 7179 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7180 7181 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7182 if self._match_texts(self.ALTER_ALTER_PARSERS): 7183 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7184 7185 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7186 # keyword after ALTER we default to parsing this statement 7187 self._match(TokenType.COLUMN) 7188 column = self._parse_field(any_token=True) 7189 7190 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7191 return self.expression(exp.AlterColumn, this=column, drop=True) 7192 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7193 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7194 if self._match(TokenType.COMMENT): 7195 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7196 if self._match_text_seq("DROP", "NOT", "NULL"): 7197 return self.expression( 7198 exp.AlterColumn, 7199 this=column, 7200 drop=True, 7201 allow_null=True, 7202 ) 7203 if self._match_text_seq("SET", "NOT", "NULL"): 7204 return self.expression( 7205 exp.AlterColumn, 7206 this=column, 7207 allow_null=False, 7208 ) 7209 7210 if self._match_text_seq("SET", "VISIBLE"): 7211 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7212 if self._match_text_seq("SET", "INVISIBLE"): 7213 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7214 7215 self._match_text_seq("SET", "DATA") 7216 self._match_text_seq("TYPE") 7217 return self.expression( 7218 exp.AlterColumn, 7219 this=column, 7220 dtype=self._parse_types(), 7221 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7222 using=self._match(TokenType.USING) and self._parse_assignment(), 7223 ) 7224 7225 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7226 if self._match_texts(("ALL", "EVEN", "AUTO")): 7227 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7228 7229 self._match_text_seq("KEY", "DISTKEY") 7230 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7231 7232 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7233 if compound: 7234 self._match_text_seq("SORTKEY") 7235 7236 if self._match(TokenType.L_PAREN, advance=False): 7237 return self.expression( 7238 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7239 ) 7240 7241 self._match_texts(("AUTO", "NONE")) 7242 return self.expression( 7243 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7244 ) 7245 7246 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7247 index = self._index - 1 7248 7249 partition_exists = self._parse_exists() 7250 if self._match(TokenType.PARTITION, advance=False): 7251 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7252 7253 self._retreat(index) 7254 return self._parse_csv(self._parse_drop_column) 7255 7256 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7257 if self._match(TokenType.COLUMN): 7258 exists = self._parse_exists() 7259 old_column = self._parse_column() 7260 to = self._match_text_seq("TO") 7261 new_column = self._parse_column() 7262 7263 if old_column is None or to is None or new_column is None: 7264 return None 7265 7266 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7267 7268 self._match_text_seq("TO") 7269 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7270 7271 def _parse_alter_table_set(self) -> exp.AlterSet: 7272 alter_set = self.expression(exp.AlterSet) 7273 7274 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7275 "TABLE", "PROPERTIES" 7276 ): 7277 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7278 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7279 alter_set.set("expressions", [self._parse_assignment()]) 7280 elif self._match_texts(("LOGGED", "UNLOGGED")): 7281 alter_set.set("option", exp.var(self._prev.text.upper())) 7282 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7283 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7284 elif self._match_text_seq("LOCATION"): 7285 alter_set.set("location", self._parse_field()) 7286 elif self._match_text_seq("ACCESS", "METHOD"): 7287 alter_set.set("access_method", self._parse_field()) 7288 elif self._match_text_seq("TABLESPACE"): 7289 alter_set.set("tablespace", self._parse_field()) 7290 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7291 alter_set.set("file_format", [self._parse_field()]) 7292 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7293 alter_set.set("file_format", self._parse_wrapped_options()) 7294 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7295 alter_set.set("copy_options", self._parse_wrapped_options()) 7296 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7297 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7298 else: 7299 if self._match_text_seq("SERDE"): 7300 alter_set.set("serde", self._parse_field()) 7301 7302 alter_set.set("expressions", [self._parse_properties()]) 7303 7304 return alter_set 7305 7306 def _parse_alter(self) -> exp.Alter | exp.Command: 7307 start = self._prev 7308 7309 alter_token = self._match_set(self.ALTERABLES) and self._prev 7310 if not alter_token: 7311 return self._parse_as_command(start) 7312 7313 exists = self._parse_exists() 7314 only = self._match_text_seq("ONLY") 7315 this = self._parse_table(schema=True) 7316 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7317 7318 if self._next: 7319 self._advance() 7320 7321 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7322 if parser: 7323 actions = ensure_list(parser(self)) 7324 not_valid = self._match_text_seq("NOT", "VALID") 7325 options = self._parse_csv(self._parse_property) 7326 7327 if not self._curr and actions: 7328 return self.expression( 7329 exp.Alter, 7330 this=this, 7331 kind=alter_token.text.upper(), 7332 exists=exists, 7333 actions=actions, 7334 only=only, 7335 options=options, 7336 cluster=cluster, 7337 not_valid=not_valid, 7338 ) 7339 7340 return self._parse_as_command(start) 7341 7342 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7343 start = self._prev 7344 # https://duckdb.org/docs/sql/statements/analyze 7345 if not self._curr: 7346 return self.expression(exp.Analyze) 7347 7348 options = [] 7349 while self._match_texts(self.ANALYZE_STYLES): 7350 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7351 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7352 else: 7353 options.append(self._prev.text.upper()) 7354 7355 this: t.Optional[exp.Expression] = None 7356 inner_expression: t.Optional[exp.Expression] = None 7357 7358 kind = self._curr and self._curr.text.upper() 7359 7360 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7361 this = self._parse_table_parts() 7362 elif self._match_text_seq("TABLES"): 7363 if self._match_set((TokenType.FROM, TokenType.IN)): 7364 kind = f"{kind} {self._prev.text.upper()}" 7365 this = self._parse_table(schema=True, is_db_reference=True) 7366 elif self._match_text_seq("DATABASE"): 7367 this = self._parse_table(schema=True, is_db_reference=True) 7368 elif self._match_text_seq("CLUSTER"): 7369 this = self._parse_table() 7370 # Try matching inner expr keywords before fallback to parse table. 7371 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7372 kind = None 7373 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7374 else: 7375 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7376 kind = None 7377 this = self._parse_table_parts() 7378 7379 partition = self._try_parse(self._parse_partition) 7380 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7381 return self._parse_as_command(start) 7382 7383 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7384 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7385 "WITH", "ASYNC", "MODE" 7386 ): 7387 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7388 else: 7389 mode = None 7390 7391 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7392 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7393 7394 properties = self._parse_properties() 7395 return self.expression( 7396 exp.Analyze, 7397 kind=kind, 7398 this=this, 7399 mode=mode, 7400 partition=partition, 7401 properties=properties, 7402 expression=inner_expression, 7403 options=options, 7404 ) 7405 7406 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7407 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7408 this = None 7409 kind = self._prev.text.upper() 7410 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7411 expressions = [] 7412 7413 if not self._match_text_seq("STATISTICS"): 7414 self.raise_error("Expecting token STATISTICS") 7415 7416 if self._match_text_seq("NOSCAN"): 7417 this = "NOSCAN" 7418 elif self._match(TokenType.FOR): 7419 if self._match_text_seq("ALL", "COLUMNS"): 7420 this = "FOR ALL COLUMNS" 7421 if self._match_texts("COLUMNS"): 7422 this = "FOR COLUMNS" 7423 expressions = self._parse_csv(self._parse_column_reference) 7424 elif self._match_text_seq("SAMPLE"): 7425 sample = self._parse_number() 7426 expressions = [ 7427 self.expression( 7428 exp.AnalyzeSample, 7429 sample=sample, 7430 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7431 ) 7432 ] 7433 7434 return self.expression( 7435 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7436 ) 7437 7438 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7439 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7440 kind = None 7441 this = None 7442 expression: t.Optional[exp.Expression] = None 7443 if self._match_text_seq("REF", "UPDATE"): 7444 kind = "REF" 7445 this = "UPDATE" 7446 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7447 this = "UPDATE SET DANGLING TO NULL" 7448 elif self._match_text_seq("STRUCTURE"): 7449 kind = "STRUCTURE" 7450 if self._match_text_seq("CASCADE", "FAST"): 7451 this = "CASCADE FAST" 7452 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7453 ("ONLINE", "OFFLINE") 7454 ): 7455 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7456 expression = self._parse_into() 7457 7458 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7459 7460 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7461 this = self._prev.text.upper() 7462 if self._match_text_seq("COLUMNS"): 7463 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7464 return None 7465 7466 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7467 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7468 if self._match_text_seq("STATISTICS"): 7469 return self.expression(exp.AnalyzeDelete, kind=kind) 7470 return None 7471 7472 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7473 if self._match_text_seq("CHAINED", "ROWS"): 7474 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7475 return None 7476 7477 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7478 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7479 this = self._prev.text.upper() 7480 expression: t.Optional[exp.Expression] = None 7481 expressions = [] 7482 update_options = None 7483 7484 if self._match_text_seq("HISTOGRAM", "ON"): 7485 expressions = self._parse_csv(self._parse_column_reference) 7486 with_expressions = [] 7487 while self._match(TokenType.WITH): 7488 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7489 if self._match_texts(("SYNC", "ASYNC")): 7490 if self._match_text_seq("MODE", advance=False): 7491 with_expressions.append(f"{self._prev.text.upper()} MODE") 7492 self._advance() 7493 else: 7494 buckets = self._parse_number() 7495 if self._match_text_seq("BUCKETS"): 7496 with_expressions.append(f"{buckets} BUCKETS") 7497 if with_expressions: 7498 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7499 7500 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7501 TokenType.UPDATE, advance=False 7502 ): 7503 update_options = self._prev.text.upper() 7504 self._advance() 7505 elif self._match_text_seq("USING", "DATA"): 7506 expression = self.expression(exp.UsingData, this=self._parse_string()) 7507 7508 return self.expression( 7509 exp.AnalyzeHistogram, 7510 this=this, 7511 expressions=expressions, 7512 expression=expression, 7513 update_options=update_options, 7514 ) 7515 7516 def _parse_merge(self) -> exp.Merge: 7517 self._match(TokenType.INTO) 7518 target = self._parse_table() 7519 7520 if target and self._match(TokenType.ALIAS, advance=False): 7521 target.set("alias", self._parse_table_alias()) 7522 7523 self._match(TokenType.USING) 7524 using = self._parse_table() 7525 7526 self._match(TokenType.ON) 7527 on = self._parse_assignment() 7528 7529 return self.expression( 7530 exp.Merge, 7531 this=target, 7532 using=using, 7533 on=on, 7534 whens=self._parse_when_matched(), 7535 returning=self._parse_returning(), 7536 ) 7537 7538 def _parse_when_matched(self) -> exp.Whens: 7539 whens = [] 7540 7541 while self._match(TokenType.WHEN): 7542 matched = not self._match(TokenType.NOT) 7543 self._match_text_seq("MATCHED") 7544 source = ( 7545 False 7546 if self._match_text_seq("BY", "TARGET") 7547 else self._match_text_seq("BY", "SOURCE") 7548 ) 7549 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7550 7551 self._match(TokenType.THEN) 7552 7553 if self._match(TokenType.INSERT): 7554 this = self._parse_star() 7555 if this: 7556 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7557 else: 7558 then = self.expression( 7559 exp.Insert, 7560 this=exp.var("ROW") 7561 if self._match_text_seq("ROW") 7562 else self._parse_value(values=False), 7563 expression=self._match_text_seq("VALUES") and self._parse_value(), 7564 ) 7565 elif self._match(TokenType.UPDATE): 7566 expressions = self._parse_star() 7567 if expressions: 7568 then = self.expression(exp.Update, expressions=expressions) 7569 else: 7570 then = self.expression( 7571 exp.Update, 7572 expressions=self._match(TokenType.SET) 7573 and self._parse_csv(self._parse_equality), 7574 ) 7575 elif self._match(TokenType.DELETE): 7576 then = self.expression(exp.Var, this=self._prev.text) 7577 else: 7578 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7579 7580 whens.append( 7581 self.expression( 7582 exp.When, 7583 matched=matched, 7584 source=source, 7585 condition=condition, 7586 then=then, 7587 ) 7588 ) 7589 return self.expression(exp.Whens, expressions=whens) 7590 7591 def _parse_show(self) -> t.Optional[exp.Expression]: 7592 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7593 if parser: 7594 return parser(self) 7595 return self._parse_as_command(self._prev) 7596 7597 def _parse_set_item_assignment( 7598 self, kind: t.Optional[str] = None 7599 ) -> t.Optional[exp.Expression]: 7600 index = self._index 7601 7602 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7603 return self._parse_set_transaction(global_=kind == "GLOBAL") 7604 7605 left = self._parse_primary() or self._parse_column() 7606 assignment_delimiter = self._match_texts(("=", "TO")) 7607 7608 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7609 self._retreat(index) 7610 return None 7611 7612 right = self._parse_statement() or self._parse_id_var() 7613 if isinstance(right, (exp.Column, exp.Identifier)): 7614 right = exp.var(right.name) 7615 7616 this = self.expression(exp.EQ, this=left, expression=right) 7617 return self.expression(exp.SetItem, this=this, kind=kind) 7618 7619 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7620 self._match_text_seq("TRANSACTION") 7621 characteristics = self._parse_csv( 7622 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7623 ) 7624 return self.expression( 7625 exp.SetItem, 7626 expressions=characteristics, 7627 kind="TRANSACTION", 7628 **{"global": global_}, # type: ignore 7629 ) 7630 7631 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7632 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7633 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7634 7635 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7636 index = self._index 7637 set_ = self.expression( 7638 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7639 ) 7640 7641 if self._curr: 7642 self._retreat(index) 7643 return self._parse_as_command(self._prev) 7644 7645 return set_ 7646 7647 def _parse_var_from_options( 7648 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7649 ) -> t.Optional[exp.Var]: 7650 start = self._curr 7651 if not start: 7652 return None 7653 7654 option = start.text.upper() 7655 continuations = options.get(option) 7656 7657 index = self._index 7658 self._advance() 7659 for keywords in continuations or []: 7660 if isinstance(keywords, str): 7661 keywords = (keywords,) 7662 7663 if self._match_text_seq(*keywords): 7664 option = f"{option} {' '.join(keywords)}" 7665 break 7666 else: 7667 if continuations or continuations is None: 7668 if raise_unmatched: 7669 self.raise_error(f"Unknown option {option}") 7670 7671 self._retreat(index) 7672 return None 7673 7674 return exp.var(option) 7675 7676 def _parse_as_command(self, start: Token) -> exp.Command: 7677 while self._curr: 7678 self._advance() 7679 text = self._find_sql(start, self._prev) 7680 size = len(start.text) 7681 self._warn_unsupported() 7682 return exp.Command(this=text[:size], expression=text[size:]) 7683 7684 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7685 settings = [] 7686 7687 self._match_l_paren() 7688 kind = self._parse_id_var() 7689 7690 if self._match(TokenType.L_PAREN): 7691 while True: 7692 key = self._parse_id_var() 7693 value = self._parse_primary() 7694 if not key and value is None: 7695 break 7696 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7697 self._match(TokenType.R_PAREN) 7698 7699 self._match_r_paren() 7700 7701 return self.expression( 7702 exp.DictProperty, 7703 this=this, 7704 kind=kind.this if kind else None, 7705 settings=settings, 7706 ) 7707 7708 def _parse_dict_range(self, this: str) -> exp.DictRange: 7709 self._match_l_paren() 7710 has_min = self._match_text_seq("MIN") 7711 if has_min: 7712 min = self._parse_var() or self._parse_primary() 7713 self._match_text_seq("MAX") 7714 max = self._parse_var() or self._parse_primary() 7715 else: 7716 max = self._parse_var() or self._parse_primary() 7717 min = exp.Literal.number(0) 7718 self._match_r_paren() 7719 return self.expression(exp.DictRange, this=this, min=min, max=max) 7720 7721 def _parse_comprehension( 7722 self, this: t.Optional[exp.Expression] 7723 ) -> t.Optional[exp.Comprehension]: 7724 index = self._index 7725 expression = self._parse_column() 7726 if not self._match(TokenType.IN): 7727 self._retreat(index - 1) 7728 return None 7729 iterator = self._parse_column() 7730 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7731 return self.expression( 7732 exp.Comprehension, 7733 this=this, 7734 expression=expression, 7735 iterator=iterator, 7736 condition=condition, 7737 ) 7738 7739 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7740 if self._match(TokenType.HEREDOC_STRING): 7741 return self.expression(exp.Heredoc, this=self._prev.text) 7742 7743 if not self._match_text_seq("$"): 7744 return None 7745 7746 tags = ["$"] 7747 tag_text = None 7748 7749 if self._is_connected(): 7750 self._advance() 7751 tags.append(self._prev.text.upper()) 7752 else: 7753 self.raise_error("No closing $ found") 7754 7755 if tags[-1] != "$": 7756 if self._is_connected() and self._match_text_seq("$"): 7757 tag_text = tags[-1] 7758 tags.append("$") 7759 else: 7760 self.raise_error("No closing $ found") 7761 7762 heredoc_start = self._curr 7763 7764 while self._curr: 7765 if self._match_text_seq(*tags, advance=False): 7766 this = self._find_sql(heredoc_start, self._prev) 7767 self._advance(len(tags)) 7768 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7769 7770 self._advance() 7771 7772 self.raise_error(f"No closing {''.join(tags)} found") 7773 return None 7774 7775 def _find_parser( 7776 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7777 ) -> t.Optional[t.Callable]: 7778 if not self._curr: 7779 return None 7780 7781 index = self._index 7782 this = [] 7783 while True: 7784 # The current token might be multiple words 7785 curr = self._curr.text.upper() 7786 key = curr.split(" ") 7787 this.append(curr) 7788 7789 self._advance() 7790 result, trie = in_trie(trie, key) 7791 if result == TrieResult.FAILED: 7792 break 7793 7794 if result == TrieResult.EXISTS: 7795 subparser = parsers[" ".join(this)] 7796 return subparser 7797 7798 self._retreat(index) 7799 return None 7800 7801 def _match(self, token_type, advance=True, expression=None): 7802 if not self._curr: 7803 return None 7804 7805 if self._curr.token_type == token_type: 7806 if advance: 7807 self._advance() 7808 self._add_comments(expression) 7809 return True 7810 7811 return None 7812 7813 def _match_set(self, types, advance=True): 7814 if not self._curr: 7815 return None 7816 7817 if self._curr.token_type in types: 7818 if advance: 7819 self._advance() 7820 return True 7821 7822 return None 7823 7824 def _match_pair(self, token_type_a, token_type_b, advance=True): 7825 if not self._curr or not self._next: 7826 return None 7827 7828 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7829 if advance: 7830 self._advance(2) 7831 return True 7832 7833 return None 7834 7835 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7836 if not self._match(TokenType.L_PAREN, expression=expression): 7837 self.raise_error("Expecting (") 7838 7839 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7840 if not self._match(TokenType.R_PAREN, expression=expression): 7841 self.raise_error("Expecting )") 7842 7843 def _match_texts(self, texts, advance=True): 7844 if ( 7845 self._curr 7846 and self._curr.token_type != TokenType.STRING 7847 and self._curr.text.upper() in texts 7848 ): 7849 if advance: 7850 self._advance() 7851 return True 7852 return None 7853 7854 def _match_text_seq(self, *texts, advance=True): 7855 index = self._index 7856 for text in texts: 7857 if ( 7858 self._curr 7859 and self._curr.token_type != TokenType.STRING 7860 and self._curr.text.upper() == text 7861 ): 7862 self._advance() 7863 else: 7864 self._retreat(index) 7865 return None 7866 7867 if not advance: 7868 self._retreat(index) 7869 7870 return True 7871 7872 def _replace_lambda( 7873 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7874 ) -> t.Optional[exp.Expression]: 7875 if not node: 7876 return node 7877 7878 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7879 7880 for column in node.find_all(exp.Column): 7881 typ = lambda_types.get(column.parts[0].name) 7882 if typ is not None: 7883 dot_or_id = column.to_dot() if column.table else column.this 7884 7885 if typ: 7886 dot_or_id = self.expression( 7887 exp.Cast, 7888 this=dot_or_id, 7889 to=typ, 7890 ) 7891 7892 parent = column.parent 7893 7894 while isinstance(parent, exp.Dot): 7895 if not isinstance(parent.parent, exp.Dot): 7896 parent.replace(dot_or_id) 7897 break 7898 parent = parent.parent 7899 else: 7900 if column is node: 7901 node = dot_or_id 7902 else: 7903 column.replace(dot_or_id) 7904 return node 7905 7906 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7907 start = self._prev 7908 7909 # Not to be confused with TRUNCATE(number, decimals) function call 7910 if self._match(TokenType.L_PAREN): 7911 self._retreat(self._index - 2) 7912 return self._parse_function() 7913 7914 # Clickhouse supports TRUNCATE DATABASE as well 7915 is_database = self._match(TokenType.DATABASE) 7916 7917 self._match(TokenType.TABLE) 7918 7919 exists = self._parse_exists(not_=False) 7920 7921 expressions = self._parse_csv( 7922 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7923 ) 7924 7925 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7926 7927 if self._match_text_seq("RESTART", "IDENTITY"): 7928 identity = "RESTART" 7929 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7930 identity = "CONTINUE" 7931 else: 7932 identity = None 7933 7934 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7935 option = self._prev.text 7936 else: 7937 option = None 7938 7939 partition = self._parse_partition() 7940 7941 # Fallback case 7942 if self._curr: 7943 return self._parse_as_command(start) 7944 7945 return self.expression( 7946 exp.TruncateTable, 7947 expressions=expressions, 7948 is_database=is_database, 7949 exists=exists, 7950 cluster=cluster, 7951 identity=identity, 7952 option=option, 7953 partition=partition, 7954 ) 7955 7956 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7957 this = self._parse_ordered(self._parse_opclass) 7958 7959 if not self._match(TokenType.WITH): 7960 return this 7961 7962 op = self._parse_var(any_token=True) 7963 7964 return self.expression(exp.WithOperator, this=this, op=op) 7965 7966 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7967 self._match(TokenType.EQ) 7968 self._match(TokenType.L_PAREN) 7969 7970 opts: t.List[t.Optional[exp.Expression]] = [] 7971 option: exp.Expression | None 7972 while self._curr and not self._match(TokenType.R_PAREN): 7973 if self._match_text_seq("FORMAT_NAME", "="): 7974 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 7975 option = self._parse_format_name() 7976 else: 7977 option = self._parse_property() 7978 7979 if option is None: 7980 self.raise_error("Unable to parse option") 7981 break 7982 7983 opts.append(option) 7984 7985 return opts 7986 7987 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7988 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7989 7990 options = [] 7991 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7992 option = self._parse_var(any_token=True) 7993 prev = self._prev.text.upper() 7994 7995 # Different dialects might separate options and values by white space, "=" and "AS" 7996 self._match(TokenType.EQ) 7997 self._match(TokenType.ALIAS) 7998 7999 param = self.expression(exp.CopyParameter, this=option) 8000 8001 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8002 TokenType.L_PAREN, advance=False 8003 ): 8004 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8005 param.set("expressions", self._parse_wrapped_options()) 8006 elif prev == "FILE_FORMAT": 8007 # T-SQL's external file format case 8008 param.set("expression", self._parse_field()) 8009 else: 8010 param.set("expression", self._parse_unquoted_field()) 8011 8012 options.append(param) 8013 self._match(sep) 8014 8015 return options 8016 8017 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8018 expr = self.expression(exp.Credentials) 8019 8020 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8021 expr.set("storage", self._parse_field()) 8022 if self._match_text_seq("CREDENTIALS"): 8023 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8024 creds = ( 8025 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8026 ) 8027 expr.set("credentials", creds) 8028 if self._match_text_seq("ENCRYPTION"): 8029 expr.set("encryption", self._parse_wrapped_options()) 8030 if self._match_text_seq("IAM_ROLE"): 8031 expr.set("iam_role", self._parse_field()) 8032 if self._match_text_seq("REGION"): 8033 expr.set("region", self._parse_field()) 8034 8035 return expr 8036 8037 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8038 return self._parse_field() 8039 8040 def _parse_copy(self) -> exp.Copy | exp.Command: 8041 start = self._prev 8042 8043 self._match(TokenType.INTO) 8044 8045 this = ( 8046 self._parse_select(nested=True, parse_subquery_alias=False) 8047 if self._match(TokenType.L_PAREN, advance=False) 8048 else self._parse_table(schema=True) 8049 ) 8050 8051 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8052 8053 files = self._parse_csv(self._parse_file_location) 8054 credentials = self._parse_credentials() 8055 8056 self._match_text_seq("WITH") 8057 8058 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8059 8060 # Fallback case 8061 if self._curr: 8062 return self._parse_as_command(start) 8063 8064 return self.expression( 8065 exp.Copy, 8066 this=this, 8067 kind=kind, 8068 credentials=credentials, 8069 files=files, 8070 params=params, 8071 ) 8072 8073 def _parse_normalize(self) -> exp.Normalize: 8074 return self.expression( 8075 exp.Normalize, 8076 this=self._parse_bitwise(), 8077 form=self._match(TokenType.COMMA) and self._parse_var(), 8078 ) 8079 8080 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8081 args = self._parse_csv(lambda: self._parse_lambda()) 8082 8083 this = seq_get(args, 0) 8084 decimals = seq_get(args, 1) 8085 8086 return expr_type( 8087 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8088 ) 8089 8090 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8091 if self._match_text_seq("COLUMNS", "(", advance=False): 8092 this = self._parse_function() 8093 if isinstance(this, exp.Columns): 8094 this.set("unpack", True) 8095 return this 8096 8097 return self.expression( 8098 exp.Star, 8099 **{ # type: ignore 8100 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8101 "replace": self._parse_star_op("REPLACE"), 8102 "rename": self._parse_star_op("RENAME"), 8103 }, 8104 ) 8105 8106 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8107 privilege_parts = [] 8108 8109 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8110 # (end of privilege list) or L_PAREN (start of column list) are met 8111 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8112 privilege_parts.append(self._curr.text.upper()) 8113 self._advance() 8114 8115 this = exp.var(" ".join(privilege_parts)) 8116 expressions = ( 8117 self._parse_wrapped_csv(self._parse_column) 8118 if self._match(TokenType.L_PAREN, advance=False) 8119 else None 8120 ) 8121 8122 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8123 8124 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8125 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8126 principal = self._parse_id_var() 8127 8128 if not principal: 8129 return None 8130 8131 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8132 8133 def _parse_grant(self) -> exp.Grant | exp.Command: 8134 start = self._prev 8135 8136 privileges = self._parse_csv(self._parse_grant_privilege) 8137 8138 self._match(TokenType.ON) 8139 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8140 8141 # Attempt to parse the securable e.g. MySQL allows names 8142 # such as "foo.*", "*.*" which are not easily parseable yet 8143 securable = self._try_parse(self._parse_table_parts) 8144 8145 if not securable or not self._match_text_seq("TO"): 8146 return self._parse_as_command(start) 8147 8148 principals = self._parse_csv(self._parse_grant_principal) 8149 8150 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8151 8152 if self._curr: 8153 return self._parse_as_command(start) 8154 8155 return self.expression( 8156 exp.Grant, 8157 privileges=privileges, 8158 kind=kind, 8159 securable=securable, 8160 principals=principals, 8161 grant_option=grant_option, 8162 ) 8163 8164 def _parse_overlay(self) -> exp.Overlay: 8165 return self.expression( 8166 exp.Overlay, 8167 **{ # type: ignore 8168 "this": self._parse_bitwise(), 8169 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8170 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8171 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8172 }, 8173 ) 8174 8175 def _parse_format_name(self) -> exp.Property: 8176 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8177 # for FILE_FORMAT = <format_name> 8178 return self.expression( 8179 exp.Property, 8180 this=exp.var("FORMAT_NAME"), 8181 value=self._parse_string() or self._parse_table_parts(), 8182 )
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
176class Parser(metaclass=_Parser): 177 """ 178 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 179 180 Args: 181 error_level: The desired error level. 182 Default: ErrorLevel.IMMEDIATE 183 error_message_context: The amount of context to capture from a query string when displaying 184 the error message (in number of characters). 185 Default: 100 186 max_errors: Maximum number of error messages to include in a raised ParseError. 187 This is only relevant if error_level is ErrorLevel.RAISE. 188 Default: 3 189 """ 190 191 FUNCTIONS: t.Dict[str, t.Callable] = { 192 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 193 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 194 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 195 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 196 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 197 ), 198 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 199 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 200 ), 201 "CHAR": lambda args: exp.Chr(expressions=args), 202 "CHR": lambda args: exp.Chr(expressions=args), 203 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 204 "CONCAT": lambda args, dialect: exp.Concat( 205 expressions=args, 206 safe=not dialect.STRICT_STRING_CONCAT, 207 coalesce=dialect.CONCAT_COALESCE, 208 ), 209 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONVERT_TIMEZONE": build_convert_timezone, 215 "DATE_TO_DATE_STR": lambda args: exp.Cast( 216 this=seq_get(args, 0), 217 to=exp.DataType(this=exp.DataType.Type.TEXT), 218 ), 219 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 220 start=seq_get(args, 0), 221 end=seq_get(args, 1), 222 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 223 ), 224 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 225 "HEX": build_hex, 226 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 227 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 228 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 229 "LIKE": build_like, 230 "LOG": build_logarithm, 231 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 232 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 233 "LOWER": build_lower, 234 "LPAD": lambda args: build_pad(args), 235 "LEFTPAD": lambda args: build_pad(args), 236 "LTRIM": lambda args: build_trim(args), 237 "MOD": build_mod, 238 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 239 "RPAD": lambda args: build_pad(args, is_left=False), 240 "RTRIM": lambda args: build_trim(args, is_left=False), 241 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 242 if len(args) != 2 243 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 244 "STRPOS": exp.StrPosition.from_arg_list, 245 "CHARINDEX": lambda args: build_locate_strposition(args), 246 "INSTR": exp.StrPosition.from_arg_list, 247 "LOCATE": lambda args: build_locate_strposition(args), 248 "TIME_TO_TIME_STR": lambda args: exp.Cast( 249 this=seq_get(args, 0), 250 to=exp.DataType(this=exp.DataType.Type.TEXT), 251 ), 252 "TO_HEX": build_hex, 253 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 254 this=exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 start=exp.Literal.number(1), 259 length=exp.Literal.number(10), 260 ), 261 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 262 "UPPER": build_upper, 263 "VAR_MAP": build_var_map, 264 } 265 266 NO_PAREN_FUNCTIONS = { 267 TokenType.CURRENT_DATE: exp.CurrentDate, 268 TokenType.CURRENT_DATETIME: exp.CurrentDate, 269 TokenType.CURRENT_TIME: exp.CurrentTime, 270 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 271 TokenType.CURRENT_USER: exp.CurrentUser, 272 } 273 274 STRUCT_TYPE_TOKENS = { 275 TokenType.NESTED, 276 TokenType.OBJECT, 277 TokenType.STRUCT, 278 TokenType.UNION, 279 } 280 281 NESTED_TYPE_TOKENS = { 282 TokenType.ARRAY, 283 TokenType.LIST, 284 TokenType.LOWCARDINALITY, 285 TokenType.MAP, 286 TokenType.NULLABLE, 287 TokenType.RANGE, 288 *STRUCT_TYPE_TOKENS, 289 } 290 291 ENUM_TYPE_TOKENS = { 292 TokenType.DYNAMIC, 293 TokenType.ENUM, 294 TokenType.ENUM8, 295 TokenType.ENUM16, 296 } 297 298 AGGREGATE_TYPE_TOKENS = { 299 TokenType.AGGREGATEFUNCTION, 300 TokenType.SIMPLEAGGREGATEFUNCTION, 301 } 302 303 TYPE_TOKENS = { 304 TokenType.BIT, 305 TokenType.BOOLEAN, 306 TokenType.TINYINT, 307 TokenType.UTINYINT, 308 TokenType.SMALLINT, 309 TokenType.USMALLINT, 310 TokenType.INT, 311 TokenType.UINT, 312 TokenType.BIGINT, 313 TokenType.UBIGINT, 314 TokenType.INT128, 315 TokenType.UINT128, 316 TokenType.INT256, 317 TokenType.UINT256, 318 TokenType.MEDIUMINT, 319 TokenType.UMEDIUMINT, 320 TokenType.FIXEDSTRING, 321 TokenType.FLOAT, 322 TokenType.DOUBLE, 323 TokenType.UDOUBLE, 324 TokenType.CHAR, 325 TokenType.NCHAR, 326 TokenType.VARCHAR, 327 TokenType.NVARCHAR, 328 TokenType.BPCHAR, 329 TokenType.TEXT, 330 TokenType.MEDIUMTEXT, 331 TokenType.LONGTEXT, 332 TokenType.BLOB, 333 TokenType.MEDIUMBLOB, 334 TokenType.LONGBLOB, 335 TokenType.BINARY, 336 TokenType.VARBINARY, 337 TokenType.JSON, 338 TokenType.JSONB, 339 TokenType.INTERVAL, 340 TokenType.TINYBLOB, 341 TokenType.TINYTEXT, 342 TokenType.TIME, 343 TokenType.TIMETZ, 344 TokenType.TIMESTAMP, 345 TokenType.TIMESTAMP_S, 346 TokenType.TIMESTAMP_MS, 347 TokenType.TIMESTAMP_NS, 348 TokenType.TIMESTAMPTZ, 349 TokenType.TIMESTAMPLTZ, 350 TokenType.TIMESTAMPNTZ, 351 TokenType.DATETIME, 352 TokenType.DATETIME2, 353 TokenType.DATETIME64, 354 TokenType.SMALLDATETIME, 355 TokenType.DATE, 356 TokenType.DATE32, 357 TokenType.INT4RANGE, 358 TokenType.INT4MULTIRANGE, 359 TokenType.INT8RANGE, 360 TokenType.INT8MULTIRANGE, 361 TokenType.NUMRANGE, 362 TokenType.NUMMULTIRANGE, 363 TokenType.TSRANGE, 364 TokenType.TSMULTIRANGE, 365 TokenType.TSTZRANGE, 366 TokenType.TSTZMULTIRANGE, 367 TokenType.DATERANGE, 368 TokenType.DATEMULTIRANGE, 369 TokenType.DECIMAL, 370 TokenType.DECIMAL32, 371 TokenType.DECIMAL64, 372 TokenType.DECIMAL128, 373 TokenType.DECIMAL256, 374 TokenType.UDECIMAL, 375 TokenType.BIGDECIMAL, 376 TokenType.UUID, 377 TokenType.GEOGRAPHY, 378 TokenType.GEOMETRY, 379 TokenType.POINT, 380 TokenType.RING, 381 TokenType.LINESTRING, 382 TokenType.MULTILINESTRING, 383 TokenType.POLYGON, 384 TokenType.MULTIPOLYGON, 385 TokenType.HLLSKETCH, 386 TokenType.HSTORE, 387 TokenType.PSEUDO_TYPE, 388 TokenType.SUPER, 389 TokenType.SERIAL, 390 TokenType.SMALLSERIAL, 391 TokenType.BIGSERIAL, 392 TokenType.XML, 393 TokenType.YEAR, 394 TokenType.USERDEFINED, 395 TokenType.MONEY, 396 TokenType.SMALLMONEY, 397 TokenType.ROWVERSION, 398 TokenType.IMAGE, 399 TokenType.VARIANT, 400 TokenType.VECTOR, 401 TokenType.OBJECT, 402 TokenType.OBJECT_IDENTIFIER, 403 TokenType.INET, 404 TokenType.IPADDRESS, 405 TokenType.IPPREFIX, 406 TokenType.IPV4, 407 TokenType.IPV6, 408 TokenType.UNKNOWN, 409 TokenType.NULL, 410 TokenType.NAME, 411 TokenType.TDIGEST, 412 TokenType.DYNAMIC, 413 *ENUM_TYPE_TOKENS, 414 *NESTED_TYPE_TOKENS, 415 *AGGREGATE_TYPE_TOKENS, 416 } 417 418 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 419 TokenType.BIGINT: TokenType.UBIGINT, 420 TokenType.INT: TokenType.UINT, 421 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 422 TokenType.SMALLINT: TokenType.USMALLINT, 423 TokenType.TINYINT: TokenType.UTINYINT, 424 TokenType.DECIMAL: TokenType.UDECIMAL, 425 TokenType.DOUBLE: TokenType.UDOUBLE, 426 } 427 428 SUBQUERY_PREDICATES = { 429 TokenType.ANY: exp.Any, 430 TokenType.ALL: exp.All, 431 TokenType.EXISTS: exp.Exists, 432 TokenType.SOME: exp.Any, 433 } 434 435 RESERVED_TOKENS = { 436 *Tokenizer.SINGLE_TOKENS.values(), 437 TokenType.SELECT, 438 } - {TokenType.IDENTIFIER} 439 440 DB_CREATABLES = { 441 TokenType.DATABASE, 442 TokenType.DICTIONARY, 443 TokenType.FILE_FORMAT, 444 TokenType.MODEL, 445 TokenType.NAMESPACE, 446 TokenType.SCHEMA, 447 TokenType.SEQUENCE, 448 TokenType.SINK, 449 TokenType.SOURCE, 450 TokenType.STAGE, 451 TokenType.STORAGE_INTEGRATION, 452 TokenType.STREAMLIT, 453 TokenType.TABLE, 454 TokenType.TAG, 455 TokenType.VIEW, 456 TokenType.WAREHOUSE, 457 } 458 459 CREATABLES = { 460 TokenType.COLUMN, 461 TokenType.CONSTRAINT, 462 TokenType.FOREIGN_KEY, 463 TokenType.FUNCTION, 464 TokenType.INDEX, 465 TokenType.PROCEDURE, 466 *DB_CREATABLES, 467 } 468 469 ALTERABLES = { 470 TokenType.INDEX, 471 TokenType.TABLE, 472 TokenType.VIEW, 473 } 474 475 # Tokens that can represent identifiers 476 ID_VAR_TOKENS = { 477 TokenType.ALL, 478 TokenType.ATTACH, 479 TokenType.VAR, 480 TokenType.ANTI, 481 TokenType.APPLY, 482 TokenType.ASC, 483 TokenType.ASOF, 484 TokenType.AUTO_INCREMENT, 485 TokenType.BEGIN, 486 TokenType.BPCHAR, 487 TokenType.CACHE, 488 TokenType.CASE, 489 TokenType.COLLATE, 490 TokenType.COMMAND, 491 TokenType.COMMENT, 492 TokenType.COMMIT, 493 TokenType.CONSTRAINT, 494 TokenType.COPY, 495 TokenType.CUBE, 496 TokenType.CURRENT_SCHEMA, 497 TokenType.DEFAULT, 498 TokenType.DELETE, 499 TokenType.DESC, 500 TokenType.DESCRIBE, 501 TokenType.DETACH, 502 TokenType.DICTIONARY, 503 TokenType.DIV, 504 TokenType.END, 505 TokenType.EXECUTE, 506 TokenType.EXPORT, 507 TokenType.ESCAPE, 508 TokenType.FALSE, 509 TokenType.FIRST, 510 TokenType.FILTER, 511 TokenType.FINAL, 512 TokenType.FORMAT, 513 TokenType.FULL, 514 TokenType.IDENTIFIER, 515 TokenType.IS, 516 TokenType.ISNULL, 517 TokenType.INTERVAL, 518 TokenType.KEEP, 519 TokenType.KILL, 520 TokenType.LEFT, 521 TokenType.LIMIT, 522 TokenType.LOAD, 523 TokenType.MERGE, 524 TokenType.NATURAL, 525 TokenType.NEXT, 526 TokenType.OFFSET, 527 TokenType.OPERATOR, 528 TokenType.ORDINALITY, 529 TokenType.OVERLAPS, 530 TokenType.OVERWRITE, 531 TokenType.PARTITION, 532 TokenType.PERCENT, 533 TokenType.PIVOT, 534 TokenType.PRAGMA, 535 TokenType.PUT, 536 TokenType.RANGE, 537 TokenType.RECURSIVE, 538 TokenType.REFERENCES, 539 TokenType.REFRESH, 540 TokenType.RENAME, 541 TokenType.REPLACE, 542 TokenType.RIGHT, 543 TokenType.ROLLUP, 544 TokenType.ROW, 545 TokenType.ROWS, 546 TokenType.SEMI, 547 TokenType.SET, 548 TokenType.SETTINGS, 549 TokenType.SHOW, 550 TokenType.TEMPORARY, 551 TokenType.TOP, 552 TokenType.TRUE, 553 TokenType.TRUNCATE, 554 TokenType.UNIQUE, 555 TokenType.UNNEST, 556 TokenType.UNPIVOT, 557 TokenType.UPDATE, 558 TokenType.USE, 559 TokenType.VOLATILE, 560 TokenType.WINDOW, 561 *CREATABLES, 562 *SUBQUERY_PREDICATES, 563 *TYPE_TOKENS, 564 *NO_PAREN_FUNCTIONS, 565 } 566 ID_VAR_TOKENS.remove(TokenType.UNION) 567 568 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 569 TokenType.ANTI, 570 TokenType.APPLY, 571 TokenType.ASOF, 572 TokenType.FULL, 573 TokenType.LEFT, 574 TokenType.LOCK, 575 TokenType.NATURAL, 576 TokenType.RIGHT, 577 TokenType.SEMI, 578 TokenType.WINDOW, 579 } 580 581 ALIAS_TOKENS = ID_VAR_TOKENS 582 583 ARRAY_CONSTRUCTORS = { 584 "ARRAY": exp.Array, 585 "LIST": exp.List, 586 } 587 588 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 589 590 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 591 592 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 593 594 FUNC_TOKENS = { 595 TokenType.COLLATE, 596 TokenType.COMMAND, 597 TokenType.CURRENT_DATE, 598 TokenType.CURRENT_DATETIME, 599 TokenType.CURRENT_SCHEMA, 600 TokenType.CURRENT_TIMESTAMP, 601 TokenType.CURRENT_TIME, 602 TokenType.CURRENT_USER, 603 TokenType.FILTER, 604 TokenType.FIRST, 605 TokenType.FORMAT, 606 TokenType.GLOB, 607 TokenType.IDENTIFIER, 608 TokenType.INDEX, 609 TokenType.ISNULL, 610 TokenType.ILIKE, 611 TokenType.INSERT, 612 TokenType.LIKE, 613 TokenType.MERGE, 614 TokenType.NEXT, 615 TokenType.OFFSET, 616 TokenType.PRIMARY_KEY, 617 TokenType.RANGE, 618 TokenType.REPLACE, 619 TokenType.RLIKE, 620 TokenType.ROW, 621 TokenType.UNNEST, 622 TokenType.VAR, 623 TokenType.LEFT, 624 TokenType.RIGHT, 625 TokenType.SEQUENCE, 626 TokenType.DATE, 627 TokenType.DATETIME, 628 TokenType.TABLE, 629 TokenType.TIMESTAMP, 630 TokenType.TIMESTAMPTZ, 631 TokenType.TRUNCATE, 632 TokenType.WINDOW, 633 TokenType.XOR, 634 *TYPE_TOKENS, 635 *SUBQUERY_PREDICATES, 636 } 637 638 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 639 TokenType.AND: exp.And, 640 } 641 642 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 643 TokenType.COLON_EQ: exp.PropertyEQ, 644 } 645 646 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.OR: exp.Or, 648 } 649 650 EQUALITY = { 651 TokenType.EQ: exp.EQ, 652 TokenType.NEQ: exp.NEQ, 653 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 654 } 655 656 COMPARISON = { 657 TokenType.GT: exp.GT, 658 TokenType.GTE: exp.GTE, 659 TokenType.LT: exp.LT, 660 TokenType.LTE: exp.LTE, 661 } 662 663 BITWISE = { 664 TokenType.AMP: exp.BitwiseAnd, 665 TokenType.CARET: exp.BitwiseXor, 666 TokenType.PIPE: exp.BitwiseOr, 667 } 668 669 TERM = { 670 TokenType.DASH: exp.Sub, 671 TokenType.PLUS: exp.Add, 672 TokenType.MOD: exp.Mod, 673 TokenType.COLLATE: exp.Collate, 674 } 675 676 FACTOR = { 677 TokenType.DIV: exp.IntDiv, 678 TokenType.LR_ARROW: exp.Distance, 679 TokenType.SLASH: exp.Div, 680 TokenType.STAR: exp.Mul, 681 } 682 683 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 684 685 TIMES = { 686 TokenType.TIME, 687 TokenType.TIMETZ, 688 } 689 690 TIMESTAMPS = { 691 TokenType.TIMESTAMP, 692 TokenType.TIMESTAMPNTZ, 693 TokenType.TIMESTAMPTZ, 694 TokenType.TIMESTAMPLTZ, 695 *TIMES, 696 } 697 698 SET_OPERATIONS = { 699 TokenType.UNION, 700 TokenType.INTERSECT, 701 TokenType.EXCEPT, 702 } 703 704 JOIN_METHODS = { 705 TokenType.ASOF, 706 TokenType.NATURAL, 707 TokenType.POSITIONAL, 708 } 709 710 JOIN_SIDES = { 711 TokenType.LEFT, 712 TokenType.RIGHT, 713 TokenType.FULL, 714 } 715 716 JOIN_KINDS = { 717 TokenType.ANTI, 718 TokenType.CROSS, 719 TokenType.INNER, 720 TokenType.OUTER, 721 TokenType.SEMI, 722 TokenType.STRAIGHT_JOIN, 723 } 724 725 JOIN_HINTS: t.Set[str] = set() 726 727 LAMBDAS = { 728 TokenType.ARROW: lambda self, expressions: self.expression( 729 exp.Lambda, 730 this=self._replace_lambda( 731 self._parse_assignment(), 732 expressions, 733 ), 734 expressions=expressions, 735 ), 736 TokenType.FARROW: lambda self, expressions: self.expression( 737 exp.Kwarg, 738 this=exp.var(expressions[0].name), 739 expression=self._parse_assignment(), 740 ), 741 } 742 743 COLUMN_OPERATORS = { 744 TokenType.DOT: None, 745 TokenType.DOTCOLON: lambda self, this, to: self.expression( 746 exp.JSONCast, 747 this=this, 748 to=to, 749 ), 750 TokenType.DCOLON: lambda self, this, to: self.expression( 751 exp.Cast if self.STRICT_CAST else exp.TryCast, 752 this=this, 753 to=to, 754 ), 755 TokenType.ARROW: lambda self, this, path: self.expression( 756 exp.JSONExtract, 757 this=this, 758 expression=self.dialect.to_json_path(path), 759 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 760 ), 761 TokenType.DARROW: lambda self, this, path: self.expression( 762 exp.JSONExtractScalar, 763 this=this, 764 expression=self.dialect.to_json_path(path), 765 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 766 ), 767 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 768 exp.JSONBExtract, 769 this=this, 770 expression=path, 771 ), 772 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 773 exp.JSONBExtractScalar, 774 this=this, 775 expression=path, 776 ), 777 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 778 exp.JSONBContains, 779 this=this, 780 expression=key, 781 ), 782 } 783 784 EXPRESSION_PARSERS = { 785 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 786 exp.Column: lambda self: self._parse_column(), 787 exp.Condition: lambda self: self._parse_assignment(), 788 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 789 exp.Expression: lambda self: self._parse_expression(), 790 exp.From: lambda self: self._parse_from(joins=True), 791 exp.Group: lambda self: self._parse_group(), 792 exp.Having: lambda self: self._parse_having(), 793 exp.Hint: lambda self: self._parse_hint_body(), 794 exp.Identifier: lambda self: self._parse_id_var(), 795 exp.Join: lambda self: self._parse_join(), 796 exp.Lambda: lambda self: self._parse_lambda(), 797 exp.Lateral: lambda self: self._parse_lateral(), 798 exp.Limit: lambda self: self._parse_limit(), 799 exp.Offset: lambda self: self._parse_offset(), 800 exp.Order: lambda self: self._parse_order(), 801 exp.Ordered: lambda self: self._parse_ordered(), 802 exp.Properties: lambda self: self._parse_properties(), 803 exp.Qualify: lambda self: self._parse_qualify(), 804 exp.Returning: lambda self: self._parse_returning(), 805 exp.Select: lambda self: self._parse_select(), 806 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 807 exp.Table: lambda self: self._parse_table_parts(), 808 exp.TableAlias: lambda self: self._parse_table_alias(), 809 exp.Tuple: lambda self: self._parse_value(values=False), 810 exp.Whens: lambda self: self._parse_when_matched(), 811 exp.Where: lambda self: self._parse_where(), 812 exp.Window: lambda self: self._parse_named_window(), 813 exp.With: lambda self: self._parse_with(), 814 "JOIN_TYPE": lambda self: self._parse_join_parts(), 815 } 816 817 STATEMENT_PARSERS = { 818 TokenType.ALTER: lambda self: self._parse_alter(), 819 TokenType.ANALYZE: lambda self: self._parse_analyze(), 820 TokenType.BEGIN: lambda self: self._parse_transaction(), 821 TokenType.CACHE: lambda self: self._parse_cache(), 822 TokenType.COMMENT: lambda self: self._parse_comment(), 823 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 824 TokenType.COPY: lambda self: self._parse_copy(), 825 TokenType.CREATE: lambda self: self._parse_create(), 826 TokenType.DELETE: lambda self: self._parse_delete(), 827 TokenType.DESC: lambda self: self._parse_describe(), 828 TokenType.DESCRIBE: lambda self: self._parse_describe(), 829 TokenType.DROP: lambda self: self._parse_drop(), 830 TokenType.GRANT: lambda self: self._parse_grant(), 831 TokenType.INSERT: lambda self: self._parse_insert(), 832 TokenType.KILL: lambda self: self._parse_kill(), 833 TokenType.LOAD: lambda self: self._parse_load(), 834 TokenType.MERGE: lambda self: self._parse_merge(), 835 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 836 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 837 TokenType.REFRESH: lambda self: self._parse_refresh(), 838 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 839 TokenType.SET: lambda self: self._parse_set(), 840 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 841 TokenType.UNCACHE: lambda self: self._parse_uncache(), 842 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 843 TokenType.UPDATE: lambda self: self._parse_update(), 844 TokenType.USE: lambda self: self._parse_use(), 845 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 846 } 847 848 UNARY_PARSERS = { 849 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 850 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 851 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 852 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 853 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 854 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 855 } 856 857 STRING_PARSERS = { 858 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 859 exp.RawString, this=token.text 860 ), 861 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 862 exp.National, this=token.text 863 ), 864 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 865 TokenType.STRING: lambda self, token: self.expression( 866 exp.Literal, this=token.text, is_string=True 867 ), 868 TokenType.UNICODE_STRING: lambda self, token: self.expression( 869 exp.UnicodeString, 870 this=token.text, 871 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 872 ), 873 } 874 875 NUMERIC_PARSERS = { 876 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 877 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 878 TokenType.HEX_STRING: lambda self, token: self.expression( 879 exp.HexString, 880 this=token.text, 881 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 882 ), 883 TokenType.NUMBER: lambda self, token: self.expression( 884 exp.Literal, this=token.text, is_string=False 885 ), 886 } 887 888 PRIMARY_PARSERS = { 889 **STRING_PARSERS, 890 **NUMERIC_PARSERS, 891 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 892 TokenType.NULL: lambda self, _: self.expression(exp.Null), 893 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 894 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 895 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 896 TokenType.STAR: lambda self, _: self._parse_star_ops(), 897 } 898 899 PLACEHOLDER_PARSERS = { 900 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 901 TokenType.PARAMETER: lambda self: self._parse_parameter(), 902 TokenType.COLON: lambda self: ( 903 self.expression(exp.Placeholder, this=self._prev.text) 904 if self._match_set(self.ID_VAR_TOKENS) 905 else None 906 ), 907 } 908 909 RANGE_PARSERS = { 910 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 911 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 912 TokenType.GLOB: binary_range_parser(exp.Glob), 913 TokenType.ILIKE: binary_range_parser(exp.ILike), 914 TokenType.IN: lambda self, this: self._parse_in(this), 915 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 916 TokenType.IS: lambda self, this: self._parse_is(this), 917 TokenType.LIKE: binary_range_parser(exp.Like), 918 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 919 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 920 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 921 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 922 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 923 } 924 925 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 926 "ALLOWED_VALUES": lambda self: self.expression( 927 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 928 ), 929 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 930 "AUTO": lambda self: self._parse_auto_property(), 931 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 932 "BACKUP": lambda self: self.expression( 933 exp.BackupProperty, this=self._parse_var(any_token=True) 934 ), 935 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 936 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 937 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 938 "CHECKSUM": lambda self: self._parse_checksum(), 939 "CLUSTER BY": lambda self: self._parse_cluster(), 940 "CLUSTERED": lambda self: self._parse_clustered_by(), 941 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 942 exp.CollateProperty, **kwargs 943 ), 944 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 945 "CONTAINS": lambda self: self._parse_contains_property(), 946 "COPY": lambda self: self._parse_copy_property(), 947 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 948 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 949 "DEFINER": lambda self: self._parse_definer(), 950 "DETERMINISTIC": lambda self: self.expression( 951 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 952 ), 953 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 954 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 955 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 956 "DISTKEY": lambda self: self._parse_distkey(), 957 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 958 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 959 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 960 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 961 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 962 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 963 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 964 "FREESPACE": lambda self: self._parse_freespace(), 965 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 966 "HEAP": lambda self: self.expression(exp.HeapProperty), 967 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 968 "IMMUTABLE": lambda self: self.expression( 969 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 970 ), 971 "INHERITS": lambda self: self.expression( 972 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 973 ), 974 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 975 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 976 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 977 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 978 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 979 "LIKE": lambda self: self._parse_create_like(), 980 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 981 "LOCK": lambda self: self._parse_locking(), 982 "LOCKING": lambda self: self._parse_locking(), 983 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 984 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 985 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 986 "MODIFIES": lambda self: self._parse_modifies_property(), 987 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 988 "NO": lambda self: self._parse_no_property(), 989 "ON": lambda self: self._parse_on_property(), 990 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 991 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 992 "PARTITION": lambda self: self._parse_partitioned_of(), 993 "PARTITION BY": lambda self: self._parse_partitioned_by(), 994 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 995 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 996 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 997 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 998 "READS": lambda self: self._parse_reads_property(), 999 "REMOTE": lambda self: self._parse_remote_with_connection(), 1000 "RETURNS": lambda self: self._parse_returns(), 1001 "STRICT": lambda self: self.expression(exp.StrictProperty), 1002 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1003 "ROW": lambda self: self._parse_row(), 1004 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1005 "SAMPLE": lambda self: self.expression( 1006 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1007 ), 1008 "SECURE": lambda self: self.expression(exp.SecureProperty), 1009 "SECURITY": lambda self: self._parse_security(), 1010 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1011 "SETTINGS": lambda self: self._parse_settings_property(), 1012 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1013 "SORTKEY": lambda self: self._parse_sortkey(), 1014 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1015 "STABLE": lambda self: self.expression( 1016 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1017 ), 1018 "STORED": lambda self: self._parse_stored(), 1019 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1020 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1021 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1022 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1023 "TO": lambda self: self._parse_to_table(), 1024 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1025 "TRANSFORM": lambda self: self.expression( 1026 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1027 ), 1028 "TTL": lambda self: self._parse_ttl(), 1029 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1030 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1031 "VOLATILE": lambda self: self._parse_volatile_property(), 1032 "WITH": lambda self: self._parse_with_property(), 1033 } 1034 1035 CONSTRAINT_PARSERS = { 1036 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1037 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1038 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1039 "CHARACTER SET": lambda self: self.expression( 1040 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1041 ), 1042 "CHECK": lambda self: self.expression( 1043 exp.CheckColumnConstraint, 1044 this=self._parse_wrapped(self._parse_assignment), 1045 enforced=self._match_text_seq("ENFORCED"), 1046 ), 1047 "COLLATE": lambda self: self.expression( 1048 exp.CollateColumnConstraint, 1049 this=self._parse_identifier() or self._parse_column(), 1050 ), 1051 "COMMENT": lambda self: self.expression( 1052 exp.CommentColumnConstraint, this=self._parse_string() 1053 ), 1054 "COMPRESS": lambda self: self._parse_compress(), 1055 "CLUSTERED": lambda self: self.expression( 1056 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1057 ), 1058 "NONCLUSTERED": lambda self: self.expression( 1059 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1060 ), 1061 "DEFAULT": lambda self: self.expression( 1062 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1063 ), 1064 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1065 "EPHEMERAL": lambda self: self.expression( 1066 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1067 ), 1068 "EXCLUDE": lambda self: self.expression( 1069 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1070 ), 1071 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1072 "FORMAT": lambda self: self.expression( 1073 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1074 ), 1075 "GENERATED": lambda self: self._parse_generated_as_identity(), 1076 "IDENTITY": lambda self: self._parse_auto_increment(), 1077 "INLINE": lambda self: self._parse_inline(), 1078 "LIKE": lambda self: self._parse_create_like(), 1079 "NOT": lambda self: self._parse_not_constraint(), 1080 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1081 "ON": lambda self: ( 1082 self._match(TokenType.UPDATE) 1083 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1084 ) 1085 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1086 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1087 "PERIOD": lambda self: self._parse_period_for_system_time(), 1088 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1089 "REFERENCES": lambda self: self._parse_references(match=False), 1090 "TITLE": lambda self: self.expression( 1091 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1092 ), 1093 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1094 "UNIQUE": lambda self: self._parse_unique(), 1095 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1096 "WATERMARK": lambda self: self.expression( 1097 exp.WatermarkColumnConstraint, 1098 this=self._match(TokenType.FOR) and self._parse_column(), 1099 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1100 ), 1101 "WITH": lambda self: self.expression( 1102 exp.Properties, expressions=self._parse_wrapped_properties() 1103 ), 1104 } 1105 1106 ALTER_PARSERS = { 1107 "ADD": lambda self: self._parse_alter_table_add(), 1108 "AS": lambda self: self._parse_select(), 1109 "ALTER": lambda self: self._parse_alter_table_alter(), 1110 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1111 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1112 "DROP": lambda self: self._parse_alter_table_drop(), 1113 "RENAME": lambda self: self._parse_alter_table_rename(), 1114 "SET": lambda self: self._parse_alter_table_set(), 1115 "SWAP": lambda self: self.expression( 1116 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1117 ), 1118 } 1119 1120 ALTER_ALTER_PARSERS = { 1121 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1122 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1123 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1124 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1125 } 1126 1127 SCHEMA_UNNAMED_CONSTRAINTS = { 1128 "CHECK", 1129 "EXCLUDE", 1130 "FOREIGN KEY", 1131 "LIKE", 1132 "PERIOD", 1133 "PRIMARY KEY", 1134 "UNIQUE", 1135 "WATERMARK", 1136 } 1137 1138 NO_PAREN_FUNCTION_PARSERS = { 1139 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1140 "CASE": lambda self: self._parse_case(), 1141 "CONNECT_BY_ROOT": lambda self: self.expression( 1142 exp.ConnectByRoot, this=self._parse_column() 1143 ), 1144 "IF": lambda self: self._parse_if(), 1145 } 1146 1147 INVALID_FUNC_NAME_TOKENS = { 1148 TokenType.IDENTIFIER, 1149 TokenType.STRING, 1150 } 1151 1152 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1153 1154 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1155 1156 FUNCTION_PARSERS = { 1157 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1158 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1159 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1160 "DECODE": lambda self: self._parse_decode(), 1161 "EXTRACT": lambda self: self._parse_extract(), 1162 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1163 "GAP_FILL": lambda self: self._parse_gap_fill(), 1164 "JSON_OBJECT": lambda self: self._parse_json_object(), 1165 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1166 "JSON_TABLE": lambda self: self._parse_json_table(), 1167 "MATCH": lambda self: self._parse_match_against(), 1168 "NORMALIZE": lambda self: self._parse_normalize(), 1169 "OPENJSON": lambda self: self._parse_open_json(), 1170 "OVERLAY": lambda self: self._parse_overlay(), 1171 "POSITION": lambda self: self._parse_position(), 1172 "PREDICT": lambda self: self._parse_predict(), 1173 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1174 "STRING_AGG": lambda self: self._parse_string_agg(), 1175 "SUBSTRING": lambda self: self._parse_substring(), 1176 "TRIM": lambda self: self._parse_trim(), 1177 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1178 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1179 "XMLELEMENT": lambda self: self.expression( 1180 exp.XMLElement, 1181 this=self._match_text_seq("NAME") and self._parse_id_var(), 1182 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1183 ), 1184 "XMLTABLE": lambda self: self._parse_xml_table(), 1185 } 1186 1187 QUERY_MODIFIER_PARSERS = { 1188 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1189 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1190 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1191 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1192 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1193 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1194 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1195 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1196 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1197 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1198 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1199 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1200 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1201 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1202 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1203 TokenType.CLUSTER_BY: lambda self: ( 1204 "cluster", 1205 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1206 ), 1207 TokenType.DISTRIBUTE_BY: lambda self: ( 1208 "distribute", 1209 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1210 ), 1211 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1212 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1213 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1214 } 1215 1216 SET_PARSERS = { 1217 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1218 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1219 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1220 "TRANSACTION": lambda self: self._parse_set_transaction(), 1221 } 1222 1223 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1224 1225 TYPE_LITERAL_PARSERS = { 1226 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1227 } 1228 1229 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1230 1231 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1232 1233 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1234 1235 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1236 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1237 "ISOLATION": ( 1238 ("LEVEL", "REPEATABLE", "READ"), 1239 ("LEVEL", "READ", "COMMITTED"), 1240 ("LEVEL", "READ", "UNCOMITTED"), 1241 ("LEVEL", "SERIALIZABLE"), 1242 ), 1243 "READ": ("WRITE", "ONLY"), 1244 } 1245 1246 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1247 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1248 ) 1249 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1250 1251 CREATE_SEQUENCE: OPTIONS_TYPE = { 1252 "SCALE": ("EXTEND", "NOEXTEND"), 1253 "SHARD": ("EXTEND", "NOEXTEND"), 1254 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1255 **dict.fromkeys( 1256 ( 1257 "SESSION", 1258 "GLOBAL", 1259 "KEEP", 1260 "NOKEEP", 1261 "ORDER", 1262 "NOORDER", 1263 "NOCACHE", 1264 "CYCLE", 1265 "NOCYCLE", 1266 "NOMINVALUE", 1267 "NOMAXVALUE", 1268 "NOSCALE", 1269 "NOSHARD", 1270 ), 1271 tuple(), 1272 ), 1273 } 1274 1275 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1276 1277 USABLES: OPTIONS_TYPE = dict.fromkeys( 1278 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1279 ) 1280 1281 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1282 1283 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1284 "TYPE": ("EVOLUTION",), 1285 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1286 } 1287 1288 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1289 1290 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1291 1292 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1293 "NOT": ("ENFORCED",), 1294 "MATCH": ( 1295 "FULL", 1296 "PARTIAL", 1297 "SIMPLE", 1298 ), 1299 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1300 "USING": ( 1301 "BTREE", 1302 "HASH", 1303 ), 1304 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1305 } 1306 1307 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1308 1309 CLONE_KEYWORDS = {"CLONE", "COPY"} 1310 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1311 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1312 1313 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1314 1315 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1316 1317 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1318 1319 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1320 1321 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1322 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1323 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1324 1325 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1326 1327 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1328 1329 ADD_CONSTRAINT_TOKENS = { 1330 TokenType.CONSTRAINT, 1331 TokenType.FOREIGN_KEY, 1332 TokenType.INDEX, 1333 TokenType.KEY, 1334 TokenType.PRIMARY_KEY, 1335 TokenType.UNIQUE, 1336 } 1337 1338 DISTINCT_TOKENS = {TokenType.DISTINCT} 1339 1340 NULL_TOKENS = {TokenType.NULL} 1341 1342 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1343 1344 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1345 1346 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1347 1348 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1349 1350 ODBC_DATETIME_LITERALS = { 1351 "d": exp.Date, 1352 "t": exp.Time, 1353 "ts": exp.Timestamp, 1354 } 1355 1356 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1357 1358 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1359 1360 # The style options for the DESCRIBE statement 1361 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1362 1363 # The style options for the ANALYZE statement 1364 ANALYZE_STYLES = { 1365 "BUFFER_USAGE_LIMIT", 1366 "FULL", 1367 "LOCAL", 1368 "NO_WRITE_TO_BINLOG", 1369 "SAMPLE", 1370 "SKIP_LOCKED", 1371 "VERBOSE", 1372 } 1373 1374 ANALYZE_EXPRESSION_PARSERS = { 1375 "ALL": lambda self: self._parse_analyze_columns(), 1376 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1377 "DELETE": lambda self: self._parse_analyze_delete(), 1378 "DROP": lambda self: self._parse_analyze_histogram(), 1379 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1380 "LIST": lambda self: self._parse_analyze_list(), 1381 "PREDICATE": lambda self: self._parse_analyze_columns(), 1382 "UPDATE": lambda self: self._parse_analyze_histogram(), 1383 "VALIDATE": lambda self: self._parse_analyze_validate(), 1384 } 1385 1386 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1387 1388 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1389 1390 OPERATION_MODIFIERS: t.Set[str] = set() 1391 1392 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1393 1394 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1395 1396 STRICT_CAST = True 1397 1398 PREFIXED_PIVOT_COLUMNS = False 1399 IDENTIFY_PIVOT_STRINGS = False 1400 1401 LOG_DEFAULTS_TO_LN = False 1402 1403 # Whether ADD is present for each column added by ALTER TABLE 1404 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1405 1406 # Whether the table sample clause expects CSV syntax 1407 TABLESAMPLE_CSV = False 1408 1409 # The default method used for table sampling 1410 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1411 1412 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1413 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1414 1415 # Whether the TRIM function expects the characters to trim as its first argument 1416 TRIM_PATTERN_FIRST = False 1417 1418 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1419 STRING_ALIASES = False 1420 1421 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1422 MODIFIERS_ATTACHED_TO_SET_OP = True 1423 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1424 1425 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1426 NO_PAREN_IF_COMMANDS = True 1427 1428 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1429 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1430 1431 # Whether the `:` operator is used to extract a value from a VARIANT column 1432 COLON_IS_VARIANT_EXTRACT = False 1433 1434 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1435 # If this is True and '(' is not found, the keyword will be treated as an identifier 1436 VALUES_FOLLOWED_BY_PAREN = True 1437 1438 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1439 SUPPORTS_IMPLICIT_UNNEST = False 1440 1441 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1442 INTERVAL_SPANS = True 1443 1444 # Whether a PARTITION clause can follow a table reference 1445 SUPPORTS_PARTITION_SELECTION = False 1446 1447 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1448 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1449 1450 # Whether the 'AS' keyword is optional in the CTE definition syntax 1451 OPTIONAL_ALIAS_TOKEN_CTE = True 1452 1453 __slots__ = ( 1454 "error_level", 1455 "error_message_context", 1456 "max_errors", 1457 "dialect", 1458 "sql", 1459 "errors", 1460 "_tokens", 1461 "_index", 1462 "_curr", 1463 "_next", 1464 "_prev", 1465 "_prev_comments", 1466 ) 1467 1468 # Autofilled 1469 SHOW_TRIE: t.Dict = {} 1470 SET_TRIE: t.Dict = {} 1471 1472 def __init__( 1473 self, 1474 error_level: t.Optional[ErrorLevel] = None, 1475 error_message_context: int = 100, 1476 max_errors: int = 3, 1477 dialect: DialectType = None, 1478 ): 1479 from sqlglot.dialects import Dialect 1480 1481 self.error_level = error_level or ErrorLevel.IMMEDIATE 1482 self.error_message_context = error_message_context 1483 self.max_errors = max_errors 1484 self.dialect = Dialect.get_or_raise(dialect) 1485 self.reset() 1486 1487 def reset(self): 1488 self.sql = "" 1489 self.errors = [] 1490 self._tokens = [] 1491 self._index = 0 1492 self._curr = None 1493 self._next = None 1494 self._prev = None 1495 self._prev_comments = None 1496 1497 def parse( 1498 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1499 ) -> t.List[t.Optional[exp.Expression]]: 1500 """ 1501 Parses a list of tokens and returns a list of syntax trees, one tree 1502 per parsed SQL statement. 1503 1504 Args: 1505 raw_tokens: The list of tokens. 1506 sql: The original SQL string, used to produce helpful debug messages. 1507 1508 Returns: 1509 The list of the produced syntax trees. 1510 """ 1511 return self._parse( 1512 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1513 ) 1514 1515 def parse_into( 1516 self, 1517 expression_types: exp.IntoType, 1518 raw_tokens: t.List[Token], 1519 sql: t.Optional[str] = None, 1520 ) -> t.List[t.Optional[exp.Expression]]: 1521 """ 1522 Parses a list of tokens into a given Expression type. If a collection of Expression 1523 types is given instead, this method will try to parse the token list into each one 1524 of them, stopping at the first for which the parsing succeeds. 1525 1526 Args: 1527 expression_types: The expression type(s) to try and parse the token list into. 1528 raw_tokens: The list of tokens. 1529 sql: The original SQL string, used to produce helpful debug messages. 1530 1531 Returns: 1532 The target Expression. 1533 """ 1534 errors = [] 1535 for expression_type in ensure_list(expression_types): 1536 parser = self.EXPRESSION_PARSERS.get(expression_type) 1537 if not parser: 1538 raise TypeError(f"No parser registered for {expression_type}") 1539 1540 try: 1541 return self._parse(parser, raw_tokens, sql) 1542 except ParseError as e: 1543 e.errors[0]["into_expression"] = expression_type 1544 errors.append(e) 1545 1546 raise ParseError( 1547 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1548 errors=merge_errors(errors), 1549 ) from errors[-1] 1550 1551 def _parse( 1552 self, 1553 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1554 raw_tokens: t.List[Token], 1555 sql: t.Optional[str] = None, 1556 ) -> t.List[t.Optional[exp.Expression]]: 1557 self.reset() 1558 self.sql = sql or "" 1559 1560 total = len(raw_tokens) 1561 chunks: t.List[t.List[Token]] = [[]] 1562 1563 for i, token in enumerate(raw_tokens): 1564 if token.token_type == TokenType.SEMICOLON: 1565 if token.comments: 1566 chunks.append([token]) 1567 1568 if i < total - 1: 1569 chunks.append([]) 1570 else: 1571 chunks[-1].append(token) 1572 1573 expressions = [] 1574 1575 for tokens in chunks: 1576 self._index = -1 1577 self._tokens = tokens 1578 self._advance() 1579 1580 expressions.append(parse_method(self)) 1581 1582 if self._index < len(self._tokens): 1583 self.raise_error("Invalid expression / Unexpected token") 1584 1585 self.check_errors() 1586 1587 return expressions 1588 1589 def check_errors(self) -> None: 1590 """Logs or raises any found errors, depending on the chosen error level setting.""" 1591 if self.error_level == ErrorLevel.WARN: 1592 for error in self.errors: 1593 logger.error(str(error)) 1594 elif self.error_level == ErrorLevel.RAISE and self.errors: 1595 raise ParseError( 1596 concat_messages(self.errors, self.max_errors), 1597 errors=merge_errors(self.errors), 1598 ) 1599 1600 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1601 """ 1602 Appends an error in the list of recorded errors or raises it, depending on the chosen 1603 error level setting. 1604 """ 1605 token = token or self._curr or self._prev or Token.string("") 1606 start = token.start 1607 end = token.end + 1 1608 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1609 highlight = self.sql[start:end] 1610 end_context = self.sql[end : end + self.error_message_context] 1611 1612 error = ParseError.new( 1613 f"{message}. Line {token.line}, Col: {token.col}.\n" 1614 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1615 description=message, 1616 line=token.line, 1617 col=token.col, 1618 start_context=start_context, 1619 highlight=highlight, 1620 end_context=end_context, 1621 ) 1622 1623 if self.error_level == ErrorLevel.IMMEDIATE: 1624 raise error 1625 1626 self.errors.append(error) 1627 1628 def expression( 1629 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1630 ) -> E: 1631 """ 1632 Creates a new, validated Expression. 1633 1634 Args: 1635 exp_class: The expression class to instantiate. 1636 comments: An optional list of comments to attach to the expression. 1637 kwargs: The arguments to set for the expression along with their respective values. 1638 1639 Returns: 1640 The target expression. 1641 """ 1642 instance = exp_class(**kwargs) 1643 instance.add_comments(comments) if comments else self._add_comments(instance) 1644 return self.validate_expression(instance) 1645 1646 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1647 if expression and self._prev_comments: 1648 expression.add_comments(self._prev_comments) 1649 self._prev_comments = None 1650 1651 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1652 """ 1653 Validates an Expression, making sure that all its mandatory arguments are set. 1654 1655 Args: 1656 expression: The expression to validate. 1657 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1658 1659 Returns: 1660 The validated expression. 1661 """ 1662 if self.error_level != ErrorLevel.IGNORE: 1663 for error_message in expression.error_messages(args): 1664 self.raise_error(error_message) 1665 1666 return expression 1667 1668 def _find_sql(self, start: Token, end: Token) -> str: 1669 return self.sql[start.start : end.end + 1] 1670 1671 def _is_connected(self) -> bool: 1672 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1673 1674 def _advance(self, times: int = 1) -> None: 1675 self._index += times 1676 self._curr = seq_get(self._tokens, self._index) 1677 self._next = seq_get(self._tokens, self._index + 1) 1678 1679 if self._index > 0: 1680 self._prev = self._tokens[self._index - 1] 1681 self._prev_comments = self._prev.comments 1682 else: 1683 self._prev = None 1684 self._prev_comments = None 1685 1686 def _retreat(self, index: int) -> None: 1687 if index != self._index: 1688 self._advance(index - self._index) 1689 1690 def _warn_unsupported(self) -> None: 1691 if len(self._tokens) <= 1: 1692 return 1693 1694 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1695 # interested in emitting a warning for the one being currently processed. 1696 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1697 1698 logger.warning( 1699 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1700 ) 1701 1702 def _parse_command(self) -> exp.Command: 1703 self._warn_unsupported() 1704 return self.expression( 1705 exp.Command, 1706 comments=self._prev_comments, 1707 this=self._prev.text.upper(), 1708 expression=self._parse_string(), 1709 ) 1710 1711 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1712 """ 1713 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1714 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1715 solve this by setting & resetting the parser state accordingly 1716 """ 1717 index = self._index 1718 error_level = self.error_level 1719 1720 self.error_level = ErrorLevel.IMMEDIATE 1721 try: 1722 this = parse_method() 1723 except ParseError: 1724 this = None 1725 finally: 1726 if not this or retreat: 1727 self._retreat(index) 1728 self.error_level = error_level 1729 1730 return this 1731 1732 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1733 start = self._prev 1734 exists = self._parse_exists() if allow_exists else None 1735 1736 self._match(TokenType.ON) 1737 1738 materialized = self._match_text_seq("MATERIALIZED") 1739 kind = self._match_set(self.CREATABLES) and self._prev 1740 if not kind: 1741 return self._parse_as_command(start) 1742 1743 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1744 this = self._parse_user_defined_function(kind=kind.token_type) 1745 elif kind.token_type == TokenType.TABLE: 1746 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1747 elif kind.token_type == TokenType.COLUMN: 1748 this = self._parse_column() 1749 else: 1750 this = self._parse_id_var() 1751 1752 self._match(TokenType.IS) 1753 1754 return self.expression( 1755 exp.Comment, 1756 this=this, 1757 kind=kind.text, 1758 expression=self._parse_string(), 1759 exists=exists, 1760 materialized=materialized, 1761 ) 1762 1763 def _parse_to_table( 1764 self, 1765 ) -> exp.ToTableProperty: 1766 table = self._parse_table_parts(schema=True) 1767 return self.expression(exp.ToTableProperty, this=table) 1768 1769 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1770 def _parse_ttl(self) -> exp.Expression: 1771 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1772 this = self._parse_bitwise() 1773 1774 if self._match_text_seq("DELETE"): 1775 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1776 if self._match_text_seq("RECOMPRESS"): 1777 return self.expression( 1778 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1779 ) 1780 if self._match_text_seq("TO", "DISK"): 1781 return self.expression( 1782 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1783 ) 1784 if self._match_text_seq("TO", "VOLUME"): 1785 return self.expression( 1786 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1787 ) 1788 1789 return this 1790 1791 expressions = self._parse_csv(_parse_ttl_action) 1792 where = self._parse_where() 1793 group = self._parse_group() 1794 1795 aggregates = None 1796 if group and self._match(TokenType.SET): 1797 aggregates = self._parse_csv(self._parse_set_item) 1798 1799 return self.expression( 1800 exp.MergeTreeTTL, 1801 expressions=expressions, 1802 where=where, 1803 group=group, 1804 aggregates=aggregates, 1805 ) 1806 1807 def _parse_statement(self) -> t.Optional[exp.Expression]: 1808 if self._curr is None: 1809 return None 1810 1811 if self._match_set(self.STATEMENT_PARSERS): 1812 comments = self._prev_comments 1813 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1814 stmt.add_comments(comments, prepend=True) 1815 return stmt 1816 1817 if self._match_set(self.dialect.tokenizer.COMMANDS): 1818 return self._parse_command() 1819 1820 expression = self._parse_expression() 1821 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1822 return self._parse_query_modifiers(expression) 1823 1824 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1825 start = self._prev 1826 temporary = self._match(TokenType.TEMPORARY) 1827 materialized = self._match_text_seq("MATERIALIZED") 1828 1829 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1830 if not kind: 1831 return self._parse_as_command(start) 1832 1833 concurrently = self._match_text_seq("CONCURRENTLY") 1834 if_exists = exists or self._parse_exists() 1835 1836 if kind == "COLUMN": 1837 this = self._parse_column() 1838 else: 1839 this = self._parse_table_parts( 1840 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1841 ) 1842 1843 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1844 1845 if self._match(TokenType.L_PAREN, advance=False): 1846 expressions = self._parse_wrapped_csv(self._parse_types) 1847 else: 1848 expressions = None 1849 1850 return self.expression( 1851 exp.Drop, 1852 exists=if_exists, 1853 this=this, 1854 expressions=expressions, 1855 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1856 temporary=temporary, 1857 materialized=materialized, 1858 cascade=self._match_text_seq("CASCADE"), 1859 constraints=self._match_text_seq("CONSTRAINTS"), 1860 purge=self._match_text_seq("PURGE"), 1861 cluster=cluster, 1862 concurrently=concurrently, 1863 ) 1864 1865 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1866 return ( 1867 self._match_text_seq("IF") 1868 and (not not_ or self._match(TokenType.NOT)) 1869 and self._match(TokenType.EXISTS) 1870 ) 1871 1872 def _parse_create(self) -> exp.Create | exp.Command: 1873 # Note: this can't be None because we've matched a statement parser 1874 start = self._prev 1875 1876 replace = ( 1877 start.token_type == TokenType.REPLACE 1878 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1879 or self._match_pair(TokenType.OR, TokenType.ALTER) 1880 ) 1881 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1882 1883 unique = self._match(TokenType.UNIQUE) 1884 1885 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1886 clustered = True 1887 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1888 "COLUMNSTORE" 1889 ): 1890 clustered = False 1891 else: 1892 clustered = None 1893 1894 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1895 self._advance() 1896 1897 properties = None 1898 create_token = self._match_set(self.CREATABLES) and self._prev 1899 1900 if not create_token: 1901 # exp.Properties.Location.POST_CREATE 1902 properties = self._parse_properties() 1903 create_token = self._match_set(self.CREATABLES) and self._prev 1904 1905 if not properties or not create_token: 1906 return self._parse_as_command(start) 1907 1908 concurrently = self._match_text_seq("CONCURRENTLY") 1909 exists = self._parse_exists(not_=True) 1910 this = None 1911 expression: t.Optional[exp.Expression] = None 1912 indexes = None 1913 no_schema_binding = None 1914 begin = None 1915 end = None 1916 clone = None 1917 1918 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1919 nonlocal properties 1920 if properties and temp_props: 1921 properties.expressions.extend(temp_props.expressions) 1922 elif temp_props: 1923 properties = temp_props 1924 1925 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1926 this = self._parse_user_defined_function(kind=create_token.token_type) 1927 1928 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1929 extend_props(self._parse_properties()) 1930 1931 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1932 extend_props(self._parse_properties()) 1933 1934 if not expression: 1935 if self._match(TokenType.COMMAND): 1936 expression = self._parse_as_command(self._prev) 1937 else: 1938 begin = self._match(TokenType.BEGIN) 1939 return_ = self._match_text_seq("RETURN") 1940 1941 if self._match(TokenType.STRING, advance=False): 1942 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1943 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1944 expression = self._parse_string() 1945 extend_props(self._parse_properties()) 1946 else: 1947 expression = self._parse_user_defined_function_expression() 1948 1949 end = self._match_text_seq("END") 1950 1951 if return_: 1952 expression = self.expression(exp.Return, this=expression) 1953 elif create_token.token_type == TokenType.INDEX: 1954 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1955 if not self._match(TokenType.ON): 1956 index = self._parse_id_var() 1957 anonymous = False 1958 else: 1959 index = None 1960 anonymous = True 1961 1962 this = self._parse_index(index=index, anonymous=anonymous) 1963 elif create_token.token_type in self.DB_CREATABLES: 1964 table_parts = self._parse_table_parts( 1965 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1966 ) 1967 1968 # exp.Properties.Location.POST_NAME 1969 self._match(TokenType.COMMA) 1970 extend_props(self._parse_properties(before=True)) 1971 1972 this = self._parse_schema(this=table_parts) 1973 1974 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1975 extend_props(self._parse_properties()) 1976 1977 self._match(TokenType.ALIAS) 1978 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1979 # exp.Properties.Location.POST_ALIAS 1980 extend_props(self._parse_properties()) 1981 1982 if create_token.token_type == TokenType.SEQUENCE: 1983 expression = self._parse_types() 1984 extend_props(self._parse_properties()) 1985 else: 1986 expression = self._parse_ddl_select() 1987 1988 if create_token.token_type == TokenType.TABLE: 1989 # exp.Properties.Location.POST_EXPRESSION 1990 extend_props(self._parse_properties()) 1991 1992 indexes = [] 1993 while True: 1994 index = self._parse_index() 1995 1996 # exp.Properties.Location.POST_INDEX 1997 extend_props(self._parse_properties()) 1998 if not index: 1999 break 2000 else: 2001 self._match(TokenType.COMMA) 2002 indexes.append(index) 2003 elif create_token.token_type == TokenType.VIEW: 2004 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2005 no_schema_binding = True 2006 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2007 extend_props(self._parse_properties()) 2008 2009 shallow = self._match_text_seq("SHALLOW") 2010 2011 if self._match_texts(self.CLONE_KEYWORDS): 2012 copy = self._prev.text.lower() == "copy" 2013 clone = self.expression( 2014 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2015 ) 2016 2017 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2018 return self._parse_as_command(start) 2019 2020 create_kind_text = create_token.text.upper() 2021 return self.expression( 2022 exp.Create, 2023 this=this, 2024 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2025 replace=replace, 2026 refresh=refresh, 2027 unique=unique, 2028 expression=expression, 2029 exists=exists, 2030 properties=properties, 2031 indexes=indexes, 2032 no_schema_binding=no_schema_binding, 2033 begin=begin, 2034 end=end, 2035 clone=clone, 2036 concurrently=concurrently, 2037 clustered=clustered, 2038 ) 2039 2040 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2041 seq = exp.SequenceProperties() 2042 2043 options = [] 2044 index = self._index 2045 2046 while self._curr: 2047 self._match(TokenType.COMMA) 2048 if self._match_text_seq("INCREMENT"): 2049 self._match_text_seq("BY") 2050 self._match_text_seq("=") 2051 seq.set("increment", self._parse_term()) 2052 elif self._match_text_seq("MINVALUE"): 2053 seq.set("minvalue", self._parse_term()) 2054 elif self._match_text_seq("MAXVALUE"): 2055 seq.set("maxvalue", self._parse_term()) 2056 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2057 self._match_text_seq("=") 2058 seq.set("start", self._parse_term()) 2059 elif self._match_text_seq("CACHE"): 2060 # T-SQL allows empty CACHE which is initialized dynamically 2061 seq.set("cache", self._parse_number() or True) 2062 elif self._match_text_seq("OWNED", "BY"): 2063 # "OWNED BY NONE" is the default 2064 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2065 else: 2066 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2067 if opt: 2068 options.append(opt) 2069 else: 2070 break 2071 2072 seq.set("options", options if options else None) 2073 return None if self._index == index else seq 2074 2075 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2076 # only used for teradata currently 2077 self._match(TokenType.COMMA) 2078 2079 kwargs = { 2080 "no": self._match_text_seq("NO"), 2081 "dual": self._match_text_seq("DUAL"), 2082 "before": self._match_text_seq("BEFORE"), 2083 "default": self._match_text_seq("DEFAULT"), 2084 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2085 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2086 "after": self._match_text_seq("AFTER"), 2087 "minimum": self._match_texts(("MIN", "MINIMUM")), 2088 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2089 } 2090 2091 if self._match_texts(self.PROPERTY_PARSERS): 2092 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2093 try: 2094 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2095 except TypeError: 2096 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2097 2098 return None 2099 2100 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2101 return self._parse_wrapped_csv(self._parse_property) 2102 2103 def _parse_property(self) -> t.Optional[exp.Expression]: 2104 if self._match_texts(self.PROPERTY_PARSERS): 2105 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2106 2107 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2108 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2109 2110 if self._match_text_seq("COMPOUND", "SORTKEY"): 2111 return self._parse_sortkey(compound=True) 2112 2113 if self._match_text_seq("SQL", "SECURITY"): 2114 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2115 2116 index = self._index 2117 key = self._parse_column() 2118 2119 if not self._match(TokenType.EQ): 2120 self._retreat(index) 2121 return self._parse_sequence_properties() 2122 2123 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2124 if isinstance(key, exp.Column): 2125 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2126 2127 value = self._parse_bitwise() or self._parse_var(any_token=True) 2128 2129 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2130 if isinstance(value, exp.Column): 2131 value = exp.var(value.name) 2132 2133 return self.expression(exp.Property, this=key, value=value) 2134 2135 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2136 if self._match_text_seq("BY"): 2137 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2138 2139 self._match(TokenType.ALIAS) 2140 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2141 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2142 2143 return self.expression( 2144 exp.FileFormatProperty, 2145 this=( 2146 self.expression( 2147 exp.InputOutputFormat, 2148 input_format=input_format, 2149 output_format=output_format, 2150 ) 2151 if input_format or output_format 2152 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2153 ), 2154 ) 2155 2156 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2157 field = self._parse_field() 2158 if isinstance(field, exp.Identifier) and not field.quoted: 2159 field = exp.var(field) 2160 2161 return field 2162 2163 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2164 self._match(TokenType.EQ) 2165 self._match(TokenType.ALIAS) 2166 2167 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2168 2169 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2170 properties = [] 2171 while True: 2172 if before: 2173 prop = self._parse_property_before() 2174 else: 2175 prop = self._parse_property() 2176 if not prop: 2177 break 2178 for p in ensure_list(prop): 2179 properties.append(p) 2180 2181 if properties: 2182 return self.expression(exp.Properties, expressions=properties) 2183 2184 return None 2185 2186 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2187 return self.expression( 2188 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2189 ) 2190 2191 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2192 if self._match_texts(("DEFINER", "INVOKER")): 2193 security_specifier = self._prev.text.upper() 2194 return self.expression(exp.SecurityProperty, this=security_specifier) 2195 return None 2196 2197 def _parse_settings_property(self) -> exp.SettingsProperty: 2198 return self.expression( 2199 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2200 ) 2201 2202 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2203 if self._index >= 2: 2204 pre_volatile_token = self._tokens[self._index - 2] 2205 else: 2206 pre_volatile_token = None 2207 2208 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2209 return exp.VolatileProperty() 2210 2211 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2212 2213 def _parse_retention_period(self) -> exp.Var: 2214 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2215 number = self._parse_number() 2216 number_str = f"{number} " if number else "" 2217 unit = self._parse_var(any_token=True) 2218 return exp.var(f"{number_str}{unit}") 2219 2220 def _parse_system_versioning_property( 2221 self, with_: bool = False 2222 ) -> exp.WithSystemVersioningProperty: 2223 self._match(TokenType.EQ) 2224 prop = self.expression( 2225 exp.WithSystemVersioningProperty, 2226 **{ # type: ignore 2227 "on": True, 2228 "with": with_, 2229 }, 2230 ) 2231 2232 if self._match_text_seq("OFF"): 2233 prop.set("on", False) 2234 return prop 2235 2236 self._match(TokenType.ON) 2237 if self._match(TokenType.L_PAREN): 2238 while self._curr and not self._match(TokenType.R_PAREN): 2239 if self._match_text_seq("HISTORY_TABLE", "="): 2240 prop.set("this", self._parse_table_parts()) 2241 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2242 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2243 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2244 prop.set("retention_period", self._parse_retention_period()) 2245 2246 self._match(TokenType.COMMA) 2247 2248 return prop 2249 2250 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2251 self._match(TokenType.EQ) 2252 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2253 prop = self.expression(exp.DataDeletionProperty, on=on) 2254 2255 if self._match(TokenType.L_PAREN): 2256 while self._curr and not self._match(TokenType.R_PAREN): 2257 if self._match_text_seq("FILTER_COLUMN", "="): 2258 prop.set("filter_column", self._parse_column()) 2259 elif self._match_text_seq("RETENTION_PERIOD", "="): 2260 prop.set("retention_period", self._parse_retention_period()) 2261 2262 self._match(TokenType.COMMA) 2263 2264 return prop 2265 2266 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2267 kind = "HASH" 2268 expressions: t.Optional[t.List[exp.Expression]] = None 2269 if self._match_text_seq("BY", "HASH"): 2270 expressions = self._parse_wrapped_csv(self._parse_id_var) 2271 elif self._match_text_seq("BY", "RANDOM"): 2272 kind = "RANDOM" 2273 2274 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2275 buckets: t.Optional[exp.Expression] = None 2276 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2277 buckets = self._parse_number() 2278 2279 return self.expression( 2280 exp.DistributedByProperty, 2281 expressions=expressions, 2282 kind=kind, 2283 buckets=buckets, 2284 order=self._parse_order(), 2285 ) 2286 2287 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2288 self._match_text_seq("KEY") 2289 expressions = self._parse_wrapped_id_vars() 2290 return self.expression(expr_type, expressions=expressions) 2291 2292 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2293 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2294 prop = self._parse_system_versioning_property(with_=True) 2295 self._match_r_paren() 2296 return prop 2297 2298 if self._match(TokenType.L_PAREN, advance=False): 2299 return self._parse_wrapped_properties() 2300 2301 if self._match_text_seq("JOURNAL"): 2302 return self._parse_withjournaltable() 2303 2304 if self._match_texts(self.VIEW_ATTRIBUTES): 2305 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2306 2307 if self._match_text_seq("DATA"): 2308 return self._parse_withdata(no=False) 2309 elif self._match_text_seq("NO", "DATA"): 2310 return self._parse_withdata(no=True) 2311 2312 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2313 return self._parse_serde_properties(with_=True) 2314 2315 if self._match(TokenType.SCHEMA): 2316 return self.expression( 2317 exp.WithSchemaBindingProperty, 2318 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2319 ) 2320 2321 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2322 return self.expression( 2323 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2324 ) 2325 2326 if not self._next: 2327 return None 2328 2329 return self._parse_withisolatedloading() 2330 2331 def _parse_procedure_option(self) -> exp.Expression | None: 2332 if self._match_text_seq("EXECUTE", "AS"): 2333 return self.expression( 2334 exp.ExecuteAsProperty, 2335 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2336 or self._parse_string(), 2337 ) 2338 2339 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2340 2341 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2342 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2343 self._match(TokenType.EQ) 2344 2345 user = self._parse_id_var() 2346 self._match(TokenType.PARAMETER) 2347 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2348 2349 if not user or not host: 2350 return None 2351 2352 return exp.DefinerProperty(this=f"{user}@{host}") 2353 2354 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2355 self._match(TokenType.TABLE) 2356 self._match(TokenType.EQ) 2357 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2358 2359 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2360 return self.expression(exp.LogProperty, no=no) 2361 2362 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2363 return self.expression(exp.JournalProperty, **kwargs) 2364 2365 def _parse_checksum(self) -> exp.ChecksumProperty: 2366 self._match(TokenType.EQ) 2367 2368 on = None 2369 if self._match(TokenType.ON): 2370 on = True 2371 elif self._match_text_seq("OFF"): 2372 on = False 2373 2374 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2375 2376 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2377 return self.expression( 2378 exp.Cluster, 2379 expressions=( 2380 self._parse_wrapped_csv(self._parse_ordered) 2381 if wrapped 2382 else self._parse_csv(self._parse_ordered) 2383 ), 2384 ) 2385 2386 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2387 self._match_text_seq("BY") 2388 2389 self._match_l_paren() 2390 expressions = self._parse_csv(self._parse_column) 2391 self._match_r_paren() 2392 2393 if self._match_text_seq("SORTED", "BY"): 2394 self._match_l_paren() 2395 sorted_by = self._parse_csv(self._parse_ordered) 2396 self._match_r_paren() 2397 else: 2398 sorted_by = None 2399 2400 self._match(TokenType.INTO) 2401 buckets = self._parse_number() 2402 self._match_text_seq("BUCKETS") 2403 2404 return self.expression( 2405 exp.ClusteredByProperty, 2406 expressions=expressions, 2407 sorted_by=sorted_by, 2408 buckets=buckets, 2409 ) 2410 2411 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2412 if not self._match_text_seq("GRANTS"): 2413 self._retreat(self._index - 1) 2414 return None 2415 2416 return self.expression(exp.CopyGrantsProperty) 2417 2418 def _parse_freespace(self) -> exp.FreespaceProperty: 2419 self._match(TokenType.EQ) 2420 return self.expression( 2421 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2422 ) 2423 2424 def _parse_mergeblockratio( 2425 self, no: bool = False, default: bool = False 2426 ) -> exp.MergeBlockRatioProperty: 2427 if self._match(TokenType.EQ): 2428 return self.expression( 2429 exp.MergeBlockRatioProperty, 2430 this=self._parse_number(), 2431 percent=self._match(TokenType.PERCENT), 2432 ) 2433 2434 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2435 2436 def _parse_datablocksize( 2437 self, 2438 default: t.Optional[bool] = None, 2439 minimum: t.Optional[bool] = None, 2440 maximum: t.Optional[bool] = None, 2441 ) -> exp.DataBlocksizeProperty: 2442 self._match(TokenType.EQ) 2443 size = self._parse_number() 2444 2445 units = None 2446 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2447 units = self._prev.text 2448 2449 return self.expression( 2450 exp.DataBlocksizeProperty, 2451 size=size, 2452 units=units, 2453 default=default, 2454 minimum=minimum, 2455 maximum=maximum, 2456 ) 2457 2458 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2459 self._match(TokenType.EQ) 2460 always = self._match_text_seq("ALWAYS") 2461 manual = self._match_text_seq("MANUAL") 2462 never = self._match_text_seq("NEVER") 2463 default = self._match_text_seq("DEFAULT") 2464 2465 autotemp = None 2466 if self._match_text_seq("AUTOTEMP"): 2467 autotemp = self._parse_schema() 2468 2469 return self.expression( 2470 exp.BlockCompressionProperty, 2471 always=always, 2472 manual=manual, 2473 never=never, 2474 default=default, 2475 autotemp=autotemp, 2476 ) 2477 2478 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2479 index = self._index 2480 no = self._match_text_seq("NO") 2481 concurrent = self._match_text_seq("CONCURRENT") 2482 2483 if not self._match_text_seq("ISOLATED", "LOADING"): 2484 self._retreat(index) 2485 return None 2486 2487 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2488 return self.expression( 2489 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2490 ) 2491 2492 def _parse_locking(self) -> exp.LockingProperty: 2493 if self._match(TokenType.TABLE): 2494 kind = "TABLE" 2495 elif self._match(TokenType.VIEW): 2496 kind = "VIEW" 2497 elif self._match(TokenType.ROW): 2498 kind = "ROW" 2499 elif self._match_text_seq("DATABASE"): 2500 kind = "DATABASE" 2501 else: 2502 kind = None 2503 2504 if kind in ("DATABASE", "TABLE", "VIEW"): 2505 this = self._parse_table_parts() 2506 else: 2507 this = None 2508 2509 if self._match(TokenType.FOR): 2510 for_or_in = "FOR" 2511 elif self._match(TokenType.IN): 2512 for_or_in = "IN" 2513 else: 2514 for_or_in = None 2515 2516 if self._match_text_seq("ACCESS"): 2517 lock_type = "ACCESS" 2518 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2519 lock_type = "EXCLUSIVE" 2520 elif self._match_text_seq("SHARE"): 2521 lock_type = "SHARE" 2522 elif self._match_text_seq("READ"): 2523 lock_type = "READ" 2524 elif self._match_text_seq("WRITE"): 2525 lock_type = "WRITE" 2526 elif self._match_text_seq("CHECKSUM"): 2527 lock_type = "CHECKSUM" 2528 else: 2529 lock_type = None 2530 2531 override = self._match_text_seq("OVERRIDE") 2532 2533 return self.expression( 2534 exp.LockingProperty, 2535 this=this, 2536 kind=kind, 2537 for_or_in=for_or_in, 2538 lock_type=lock_type, 2539 override=override, 2540 ) 2541 2542 def _parse_partition_by(self) -> t.List[exp.Expression]: 2543 if self._match(TokenType.PARTITION_BY): 2544 return self._parse_csv(self._parse_assignment) 2545 return [] 2546 2547 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2548 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2549 if self._match_text_seq("MINVALUE"): 2550 return exp.var("MINVALUE") 2551 if self._match_text_seq("MAXVALUE"): 2552 return exp.var("MAXVALUE") 2553 return self._parse_bitwise() 2554 2555 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2556 expression = None 2557 from_expressions = None 2558 to_expressions = None 2559 2560 if self._match(TokenType.IN): 2561 this = self._parse_wrapped_csv(self._parse_bitwise) 2562 elif self._match(TokenType.FROM): 2563 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2564 self._match_text_seq("TO") 2565 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2566 elif self._match_text_seq("WITH", "(", "MODULUS"): 2567 this = self._parse_number() 2568 self._match_text_seq(",", "REMAINDER") 2569 expression = self._parse_number() 2570 self._match_r_paren() 2571 else: 2572 self.raise_error("Failed to parse partition bound spec.") 2573 2574 return self.expression( 2575 exp.PartitionBoundSpec, 2576 this=this, 2577 expression=expression, 2578 from_expressions=from_expressions, 2579 to_expressions=to_expressions, 2580 ) 2581 2582 # https://www.postgresql.org/docs/current/sql-createtable.html 2583 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2584 if not self._match_text_seq("OF"): 2585 self._retreat(self._index - 1) 2586 return None 2587 2588 this = self._parse_table(schema=True) 2589 2590 if self._match(TokenType.DEFAULT): 2591 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2592 elif self._match_text_seq("FOR", "VALUES"): 2593 expression = self._parse_partition_bound_spec() 2594 else: 2595 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2596 2597 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2598 2599 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2600 self._match(TokenType.EQ) 2601 return self.expression( 2602 exp.PartitionedByProperty, 2603 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2604 ) 2605 2606 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2607 if self._match_text_seq("AND", "STATISTICS"): 2608 statistics = True 2609 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2610 statistics = False 2611 else: 2612 statistics = None 2613 2614 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2615 2616 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2617 if self._match_text_seq("SQL"): 2618 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2619 return None 2620 2621 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2622 if self._match_text_seq("SQL", "DATA"): 2623 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2624 return None 2625 2626 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2627 if self._match_text_seq("PRIMARY", "INDEX"): 2628 return exp.NoPrimaryIndexProperty() 2629 if self._match_text_seq("SQL"): 2630 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2631 return None 2632 2633 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2634 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2635 return exp.OnCommitProperty() 2636 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2637 return exp.OnCommitProperty(delete=True) 2638 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2639 2640 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2641 if self._match_text_seq("SQL", "DATA"): 2642 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2643 return None 2644 2645 def _parse_distkey(self) -> exp.DistKeyProperty: 2646 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2647 2648 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2649 table = self._parse_table(schema=True) 2650 2651 options = [] 2652 while self._match_texts(("INCLUDING", "EXCLUDING")): 2653 this = self._prev.text.upper() 2654 2655 id_var = self._parse_id_var() 2656 if not id_var: 2657 return None 2658 2659 options.append( 2660 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2661 ) 2662 2663 return self.expression(exp.LikeProperty, this=table, expressions=options) 2664 2665 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2666 return self.expression( 2667 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2668 ) 2669 2670 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2671 self._match(TokenType.EQ) 2672 return self.expression( 2673 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2674 ) 2675 2676 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2677 self._match_text_seq("WITH", "CONNECTION") 2678 return self.expression( 2679 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2680 ) 2681 2682 def _parse_returns(self) -> exp.ReturnsProperty: 2683 value: t.Optional[exp.Expression] 2684 null = None 2685 is_table = self._match(TokenType.TABLE) 2686 2687 if is_table: 2688 if self._match(TokenType.LT): 2689 value = self.expression( 2690 exp.Schema, 2691 this="TABLE", 2692 expressions=self._parse_csv(self._parse_struct_types), 2693 ) 2694 if not self._match(TokenType.GT): 2695 self.raise_error("Expecting >") 2696 else: 2697 value = self._parse_schema(exp.var("TABLE")) 2698 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2699 null = True 2700 value = None 2701 else: 2702 value = self._parse_types() 2703 2704 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2705 2706 def _parse_describe(self) -> exp.Describe: 2707 kind = self._match_set(self.CREATABLES) and self._prev.text 2708 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2709 if self._match(TokenType.DOT): 2710 style = None 2711 self._retreat(self._index - 2) 2712 2713 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2714 2715 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2716 this = self._parse_statement() 2717 else: 2718 this = self._parse_table(schema=True) 2719 2720 properties = self._parse_properties() 2721 expressions = properties.expressions if properties else None 2722 partition = self._parse_partition() 2723 return self.expression( 2724 exp.Describe, 2725 this=this, 2726 style=style, 2727 kind=kind, 2728 expressions=expressions, 2729 partition=partition, 2730 format=format, 2731 ) 2732 2733 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2734 kind = self._prev.text.upper() 2735 expressions = [] 2736 2737 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2738 if self._match(TokenType.WHEN): 2739 expression = self._parse_disjunction() 2740 self._match(TokenType.THEN) 2741 else: 2742 expression = None 2743 2744 else_ = self._match(TokenType.ELSE) 2745 2746 if not self._match(TokenType.INTO): 2747 return None 2748 2749 return self.expression( 2750 exp.ConditionalInsert, 2751 this=self.expression( 2752 exp.Insert, 2753 this=self._parse_table(schema=True), 2754 expression=self._parse_derived_table_values(), 2755 ), 2756 expression=expression, 2757 else_=else_, 2758 ) 2759 2760 expression = parse_conditional_insert() 2761 while expression is not None: 2762 expressions.append(expression) 2763 expression = parse_conditional_insert() 2764 2765 return self.expression( 2766 exp.MultitableInserts, 2767 kind=kind, 2768 comments=comments, 2769 expressions=expressions, 2770 source=self._parse_table(), 2771 ) 2772 2773 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2774 comments = [] 2775 hint = self._parse_hint() 2776 overwrite = self._match(TokenType.OVERWRITE) 2777 ignore = self._match(TokenType.IGNORE) 2778 local = self._match_text_seq("LOCAL") 2779 alternative = None 2780 is_function = None 2781 2782 if self._match_text_seq("DIRECTORY"): 2783 this: t.Optional[exp.Expression] = self.expression( 2784 exp.Directory, 2785 this=self._parse_var_or_string(), 2786 local=local, 2787 row_format=self._parse_row_format(match_row=True), 2788 ) 2789 else: 2790 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2791 comments += ensure_list(self._prev_comments) 2792 return self._parse_multitable_inserts(comments) 2793 2794 if self._match(TokenType.OR): 2795 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2796 2797 self._match(TokenType.INTO) 2798 comments += ensure_list(self._prev_comments) 2799 self._match(TokenType.TABLE) 2800 is_function = self._match(TokenType.FUNCTION) 2801 2802 this = ( 2803 self._parse_table(schema=True, parse_partition=True) 2804 if not is_function 2805 else self._parse_function() 2806 ) 2807 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2808 this.set("alias", self._parse_table_alias()) 2809 2810 returning = self._parse_returning() 2811 2812 return self.expression( 2813 exp.Insert, 2814 comments=comments, 2815 hint=hint, 2816 is_function=is_function, 2817 this=this, 2818 stored=self._match_text_seq("STORED") and self._parse_stored(), 2819 by_name=self._match_text_seq("BY", "NAME"), 2820 exists=self._parse_exists(), 2821 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2822 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2823 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2824 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2825 conflict=self._parse_on_conflict(), 2826 returning=returning or self._parse_returning(), 2827 overwrite=overwrite, 2828 alternative=alternative, 2829 ignore=ignore, 2830 source=self._match(TokenType.TABLE) and self._parse_table(), 2831 ) 2832 2833 def _parse_kill(self) -> exp.Kill: 2834 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2835 2836 return self.expression( 2837 exp.Kill, 2838 this=self._parse_primary(), 2839 kind=kind, 2840 ) 2841 2842 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2843 conflict = self._match_text_seq("ON", "CONFLICT") 2844 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2845 2846 if not conflict and not duplicate: 2847 return None 2848 2849 conflict_keys = None 2850 constraint = None 2851 2852 if conflict: 2853 if self._match_text_seq("ON", "CONSTRAINT"): 2854 constraint = self._parse_id_var() 2855 elif self._match(TokenType.L_PAREN): 2856 conflict_keys = self._parse_csv(self._parse_id_var) 2857 self._match_r_paren() 2858 2859 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2860 if self._prev.token_type == TokenType.UPDATE: 2861 self._match(TokenType.SET) 2862 expressions = self._parse_csv(self._parse_equality) 2863 else: 2864 expressions = None 2865 2866 return self.expression( 2867 exp.OnConflict, 2868 duplicate=duplicate, 2869 expressions=expressions, 2870 action=action, 2871 conflict_keys=conflict_keys, 2872 constraint=constraint, 2873 where=self._parse_where(), 2874 ) 2875 2876 def _parse_returning(self) -> t.Optional[exp.Returning]: 2877 if not self._match(TokenType.RETURNING): 2878 return None 2879 return self.expression( 2880 exp.Returning, 2881 expressions=self._parse_csv(self._parse_expression), 2882 into=self._match(TokenType.INTO) and self._parse_table_part(), 2883 ) 2884 2885 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2886 if not self._match(TokenType.FORMAT): 2887 return None 2888 return self._parse_row_format() 2889 2890 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2891 index = self._index 2892 with_ = with_ or self._match_text_seq("WITH") 2893 2894 if not self._match(TokenType.SERDE_PROPERTIES): 2895 self._retreat(index) 2896 return None 2897 return self.expression( 2898 exp.SerdeProperties, 2899 **{ # type: ignore 2900 "expressions": self._parse_wrapped_properties(), 2901 "with": with_, 2902 }, 2903 ) 2904 2905 def _parse_row_format( 2906 self, match_row: bool = False 2907 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2908 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2909 return None 2910 2911 if self._match_text_seq("SERDE"): 2912 this = self._parse_string() 2913 2914 serde_properties = self._parse_serde_properties() 2915 2916 return self.expression( 2917 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2918 ) 2919 2920 self._match_text_seq("DELIMITED") 2921 2922 kwargs = {} 2923 2924 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2925 kwargs["fields"] = self._parse_string() 2926 if self._match_text_seq("ESCAPED", "BY"): 2927 kwargs["escaped"] = self._parse_string() 2928 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2929 kwargs["collection_items"] = self._parse_string() 2930 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2931 kwargs["map_keys"] = self._parse_string() 2932 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2933 kwargs["lines"] = self._parse_string() 2934 if self._match_text_seq("NULL", "DEFINED", "AS"): 2935 kwargs["null"] = self._parse_string() 2936 2937 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2938 2939 def _parse_load(self) -> exp.LoadData | exp.Command: 2940 if self._match_text_seq("DATA"): 2941 local = self._match_text_seq("LOCAL") 2942 self._match_text_seq("INPATH") 2943 inpath = self._parse_string() 2944 overwrite = self._match(TokenType.OVERWRITE) 2945 self._match_pair(TokenType.INTO, TokenType.TABLE) 2946 2947 return self.expression( 2948 exp.LoadData, 2949 this=self._parse_table(schema=True), 2950 local=local, 2951 overwrite=overwrite, 2952 inpath=inpath, 2953 partition=self._parse_partition(), 2954 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2955 serde=self._match_text_seq("SERDE") and self._parse_string(), 2956 ) 2957 return self._parse_as_command(self._prev) 2958 2959 def _parse_delete(self) -> exp.Delete: 2960 # This handles MySQL's "Multiple-Table Syntax" 2961 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2962 tables = None 2963 if not self._match(TokenType.FROM, advance=False): 2964 tables = self._parse_csv(self._parse_table) or None 2965 2966 returning = self._parse_returning() 2967 2968 return self.expression( 2969 exp.Delete, 2970 tables=tables, 2971 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2972 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2973 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2974 where=self._parse_where(), 2975 returning=returning or self._parse_returning(), 2976 limit=self._parse_limit(), 2977 ) 2978 2979 def _parse_update(self) -> exp.Update: 2980 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2981 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2982 returning = self._parse_returning() 2983 return self.expression( 2984 exp.Update, 2985 **{ # type: ignore 2986 "this": this, 2987 "expressions": expressions, 2988 "from": self._parse_from(joins=True), 2989 "where": self._parse_where(), 2990 "returning": returning or self._parse_returning(), 2991 "order": self._parse_order(), 2992 "limit": self._parse_limit(), 2993 }, 2994 ) 2995 2996 def _parse_use(self) -> exp.Use: 2997 return self.expression( 2998 exp.Use, 2999 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3000 this=self._parse_table(schema=False), 3001 ) 3002 3003 def _parse_uncache(self) -> exp.Uncache: 3004 if not self._match(TokenType.TABLE): 3005 self.raise_error("Expecting TABLE after UNCACHE") 3006 3007 return self.expression( 3008 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3009 ) 3010 3011 def _parse_cache(self) -> exp.Cache: 3012 lazy = self._match_text_seq("LAZY") 3013 self._match(TokenType.TABLE) 3014 table = self._parse_table(schema=True) 3015 3016 options = [] 3017 if self._match_text_seq("OPTIONS"): 3018 self._match_l_paren() 3019 k = self._parse_string() 3020 self._match(TokenType.EQ) 3021 v = self._parse_string() 3022 options = [k, v] 3023 self._match_r_paren() 3024 3025 self._match(TokenType.ALIAS) 3026 return self.expression( 3027 exp.Cache, 3028 this=table, 3029 lazy=lazy, 3030 options=options, 3031 expression=self._parse_select(nested=True), 3032 ) 3033 3034 def _parse_partition(self) -> t.Optional[exp.Partition]: 3035 if not self._match_texts(self.PARTITION_KEYWORDS): 3036 return None 3037 3038 return self.expression( 3039 exp.Partition, 3040 subpartition=self._prev.text.upper() == "SUBPARTITION", 3041 expressions=self._parse_wrapped_csv(self._parse_assignment), 3042 ) 3043 3044 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3045 def _parse_value_expression() -> t.Optional[exp.Expression]: 3046 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3047 return exp.var(self._prev.text.upper()) 3048 return self._parse_expression() 3049 3050 if self._match(TokenType.L_PAREN): 3051 expressions = self._parse_csv(_parse_value_expression) 3052 self._match_r_paren() 3053 return self.expression(exp.Tuple, expressions=expressions) 3054 3055 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3056 expression = self._parse_expression() 3057 if expression: 3058 return self.expression(exp.Tuple, expressions=[expression]) 3059 return None 3060 3061 def _parse_projections(self) -> t.List[exp.Expression]: 3062 return self._parse_expressions() 3063 3064 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3065 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3066 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3067 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3068 ) 3069 elif self._match(TokenType.FROM): 3070 from_ = self._parse_from(skip_from_token=True) 3071 # Support parentheses for duckdb FROM-first syntax 3072 select = self._parse_select() 3073 if select: 3074 select.set("from", from_) 3075 this = select 3076 else: 3077 this = exp.select("*").from_(t.cast(exp.From, from_)) 3078 else: 3079 this = ( 3080 self._parse_table() 3081 if table 3082 else self._parse_select(nested=True, parse_set_operation=False) 3083 ) 3084 3085 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3086 # in case a modifier (e.g. join) is following 3087 if table and isinstance(this, exp.Values) and this.alias: 3088 alias = this.args["alias"].pop() 3089 this = exp.Table(this=this, alias=alias) 3090 3091 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3092 3093 return this 3094 3095 def _parse_select( 3096 self, 3097 nested: bool = False, 3098 table: bool = False, 3099 parse_subquery_alias: bool = True, 3100 parse_set_operation: bool = True, 3101 ) -> t.Optional[exp.Expression]: 3102 cte = self._parse_with() 3103 3104 if cte: 3105 this = self._parse_statement() 3106 3107 if not this: 3108 self.raise_error("Failed to parse any statement following CTE") 3109 return cte 3110 3111 if "with" in this.arg_types: 3112 this.set("with", cte) 3113 else: 3114 self.raise_error(f"{this.key} does not support CTE") 3115 this = cte 3116 3117 return this 3118 3119 # duckdb supports leading with FROM x 3120 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3121 3122 if self._match(TokenType.SELECT): 3123 comments = self._prev_comments 3124 3125 hint = self._parse_hint() 3126 3127 if self._next and not self._next.token_type == TokenType.DOT: 3128 all_ = self._match(TokenType.ALL) 3129 distinct = self._match_set(self.DISTINCT_TOKENS) 3130 else: 3131 all_, distinct = None, None 3132 3133 kind = ( 3134 self._match(TokenType.ALIAS) 3135 and self._match_texts(("STRUCT", "VALUE")) 3136 and self._prev.text.upper() 3137 ) 3138 3139 if distinct: 3140 distinct = self.expression( 3141 exp.Distinct, 3142 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3143 ) 3144 3145 if all_ and distinct: 3146 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3147 3148 operation_modifiers = [] 3149 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3150 operation_modifiers.append(exp.var(self._prev.text.upper())) 3151 3152 limit = self._parse_limit(top=True) 3153 projections = self._parse_projections() 3154 3155 this = self.expression( 3156 exp.Select, 3157 kind=kind, 3158 hint=hint, 3159 distinct=distinct, 3160 expressions=projections, 3161 limit=limit, 3162 operation_modifiers=operation_modifiers or None, 3163 ) 3164 this.comments = comments 3165 3166 into = self._parse_into() 3167 if into: 3168 this.set("into", into) 3169 3170 if not from_: 3171 from_ = self._parse_from() 3172 3173 if from_: 3174 this.set("from", from_) 3175 3176 this = self._parse_query_modifiers(this) 3177 elif (table or nested) and self._match(TokenType.L_PAREN): 3178 this = self._parse_wrapped_select(table=table) 3179 3180 # We return early here so that the UNION isn't attached to the subquery by the 3181 # following call to _parse_set_operations, but instead becomes the parent node 3182 self._match_r_paren() 3183 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3184 elif self._match(TokenType.VALUES, advance=False): 3185 this = self._parse_derived_table_values() 3186 elif from_: 3187 this = exp.select("*").from_(from_.this, copy=False) 3188 elif self._match(TokenType.SUMMARIZE): 3189 table = self._match(TokenType.TABLE) 3190 this = self._parse_select() or self._parse_string() or self._parse_table() 3191 return self.expression(exp.Summarize, this=this, table=table) 3192 elif self._match(TokenType.DESCRIBE): 3193 this = self._parse_describe() 3194 elif self._match_text_seq("STREAM"): 3195 this = self._parse_function() 3196 if this: 3197 this = self.expression(exp.Stream, this=this) 3198 else: 3199 self._retreat(self._index - 1) 3200 else: 3201 this = None 3202 3203 return self._parse_set_operations(this) if parse_set_operation else this 3204 3205 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3206 self._match_text_seq("SEARCH") 3207 3208 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3209 3210 if not kind: 3211 return None 3212 3213 self._match_text_seq("FIRST", "BY") 3214 3215 return self.expression( 3216 exp.RecursiveWithSearch, 3217 kind=kind, 3218 this=self._parse_id_var(), 3219 expression=self._match_text_seq("SET") and self._parse_id_var(), 3220 using=self._match_text_seq("USING") and self._parse_id_var(), 3221 ) 3222 3223 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3224 if not skip_with_token and not self._match(TokenType.WITH): 3225 return None 3226 3227 comments = self._prev_comments 3228 recursive = self._match(TokenType.RECURSIVE) 3229 3230 last_comments = None 3231 expressions = [] 3232 while True: 3233 cte = self._parse_cte() 3234 if isinstance(cte, exp.CTE): 3235 expressions.append(cte) 3236 if last_comments: 3237 cte.add_comments(last_comments) 3238 3239 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3240 break 3241 else: 3242 self._match(TokenType.WITH) 3243 3244 last_comments = self._prev_comments 3245 3246 return self.expression( 3247 exp.With, 3248 comments=comments, 3249 expressions=expressions, 3250 recursive=recursive, 3251 search=self._parse_recursive_with_search(), 3252 ) 3253 3254 def _parse_cte(self) -> t.Optional[exp.CTE]: 3255 index = self._index 3256 3257 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3258 if not alias or not alias.this: 3259 self.raise_error("Expected CTE to have alias") 3260 3261 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3262 self._retreat(index) 3263 return None 3264 3265 comments = self._prev_comments 3266 3267 if self._match_text_seq("NOT", "MATERIALIZED"): 3268 materialized = False 3269 elif self._match_text_seq("MATERIALIZED"): 3270 materialized = True 3271 else: 3272 materialized = None 3273 3274 cte = self.expression( 3275 exp.CTE, 3276 this=self._parse_wrapped(self._parse_statement), 3277 alias=alias, 3278 materialized=materialized, 3279 comments=comments, 3280 ) 3281 3282 if isinstance(cte.this, exp.Values): 3283 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3284 3285 return cte 3286 3287 def _parse_table_alias( 3288 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3289 ) -> t.Optional[exp.TableAlias]: 3290 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3291 # so this section tries to parse the clause version and if it fails, it treats the token 3292 # as an identifier (alias) 3293 if self._can_parse_limit_or_offset(): 3294 return None 3295 3296 any_token = self._match(TokenType.ALIAS) 3297 alias = ( 3298 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3299 or self._parse_string_as_identifier() 3300 ) 3301 3302 index = self._index 3303 if self._match(TokenType.L_PAREN): 3304 columns = self._parse_csv(self._parse_function_parameter) 3305 self._match_r_paren() if columns else self._retreat(index) 3306 else: 3307 columns = None 3308 3309 if not alias and not columns: 3310 return None 3311 3312 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3313 3314 # We bubble up comments from the Identifier to the TableAlias 3315 if isinstance(alias, exp.Identifier): 3316 table_alias.add_comments(alias.pop_comments()) 3317 3318 return table_alias 3319 3320 def _parse_subquery( 3321 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3322 ) -> t.Optional[exp.Subquery]: 3323 if not this: 3324 return None 3325 3326 return self.expression( 3327 exp.Subquery, 3328 this=this, 3329 pivots=self._parse_pivots(), 3330 alias=self._parse_table_alias() if parse_alias else None, 3331 sample=self._parse_table_sample(), 3332 ) 3333 3334 def _implicit_unnests_to_explicit(self, this: E) -> E: 3335 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3336 3337 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3338 for i, join in enumerate(this.args.get("joins") or []): 3339 table = join.this 3340 normalized_table = table.copy() 3341 normalized_table.meta["maybe_column"] = True 3342 normalized_table = _norm(normalized_table, dialect=self.dialect) 3343 3344 if isinstance(table, exp.Table) and not join.args.get("on"): 3345 if normalized_table.parts[0].name in refs: 3346 table_as_column = table.to_column() 3347 unnest = exp.Unnest(expressions=[table_as_column]) 3348 3349 # Table.to_column creates a parent Alias node that we want to convert to 3350 # a TableAlias and attach to the Unnest, so it matches the parser's output 3351 if isinstance(table.args.get("alias"), exp.TableAlias): 3352 table_as_column.replace(table_as_column.this) 3353 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3354 3355 table.replace(unnest) 3356 3357 refs.add(normalized_table.alias_or_name) 3358 3359 return this 3360 3361 def _parse_query_modifiers( 3362 self, this: t.Optional[exp.Expression] 3363 ) -> t.Optional[exp.Expression]: 3364 if isinstance(this, self.MODIFIABLES): 3365 for join in self._parse_joins(): 3366 this.append("joins", join) 3367 for lateral in iter(self._parse_lateral, None): 3368 this.append("laterals", lateral) 3369 3370 while True: 3371 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3372 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3373 key, expression = parser(self) 3374 3375 if expression: 3376 this.set(key, expression) 3377 if key == "limit": 3378 offset = expression.args.pop("offset", None) 3379 3380 if offset: 3381 offset = exp.Offset(expression=offset) 3382 this.set("offset", offset) 3383 3384 limit_by_expressions = expression.expressions 3385 expression.set("expressions", None) 3386 offset.set("expressions", limit_by_expressions) 3387 continue 3388 break 3389 3390 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3391 this = self._implicit_unnests_to_explicit(this) 3392 3393 return this 3394 3395 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3396 start = self._curr 3397 while self._curr: 3398 self._advance() 3399 3400 end = self._tokens[self._index - 1] 3401 return exp.Hint(expressions=[self._find_sql(start, end)]) 3402 3403 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3404 return self._parse_function_call() 3405 3406 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3407 start_index = self._index 3408 should_fallback_to_string = False 3409 3410 hints = [] 3411 try: 3412 for hint in iter( 3413 lambda: self._parse_csv( 3414 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3415 ), 3416 [], 3417 ): 3418 hints.extend(hint) 3419 except ParseError: 3420 should_fallback_to_string = True 3421 3422 if should_fallback_to_string or self._curr: 3423 self._retreat(start_index) 3424 return self._parse_hint_fallback_to_string() 3425 3426 return self.expression(exp.Hint, expressions=hints) 3427 3428 def _parse_hint(self) -> t.Optional[exp.Hint]: 3429 if self._match(TokenType.HINT) and self._prev_comments: 3430 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3431 3432 return None 3433 3434 def _parse_into(self) -> t.Optional[exp.Into]: 3435 if not self._match(TokenType.INTO): 3436 return None 3437 3438 temp = self._match(TokenType.TEMPORARY) 3439 unlogged = self._match_text_seq("UNLOGGED") 3440 self._match(TokenType.TABLE) 3441 3442 return self.expression( 3443 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3444 ) 3445 3446 def _parse_from( 3447 self, joins: bool = False, skip_from_token: bool = False 3448 ) -> t.Optional[exp.From]: 3449 if not skip_from_token and not self._match(TokenType.FROM): 3450 return None 3451 3452 return self.expression( 3453 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3454 ) 3455 3456 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3457 return self.expression( 3458 exp.MatchRecognizeMeasure, 3459 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3460 this=self._parse_expression(), 3461 ) 3462 3463 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3464 if not self._match(TokenType.MATCH_RECOGNIZE): 3465 return None 3466 3467 self._match_l_paren() 3468 3469 partition = self._parse_partition_by() 3470 order = self._parse_order() 3471 3472 measures = ( 3473 self._parse_csv(self._parse_match_recognize_measure) 3474 if self._match_text_seq("MEASURES") 3475 else None 3476 ) 3477 3478 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3479 rows = exp.var("ONE ROW PER MATCH") 3480 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3481 text = "ALL ROWS PER MATCH" 3482 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3483 text += " SHOW EMPTY MATCHES" 3484 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3485 text += " OMIT EMPTY MATCHES" 3486 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3487 text += " WITH UNMATCHED ROWS" 3488 rows = exp.var(text) 3489 else: 3490 rows = None 3491 3492 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3493 text = "AFTER MATCH SKIP" 3494 if self._match_text_seq("PAST", "LAST", "ROW"): 3495 text += " PAST LAST ROW" 3496 elif self._match_text_seq("TO", "NEXT", "ROW"): 3497 text += " TO NEXT ROW" 3498 elif self._match_text_seq("TO", "FIRST"): 3499 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3500 elif self._match_text_seq("TO", "LAST"): 3501 text += f" TO LAST {self._advance_any().text}" # type: ignore 3502 after = exp.var(text) 3503 else: 3504 after = None 3505 3506 if self._match_text_seq("PATTERN"): 3507 self._match_l_paren() 3508 3509 if not self._curr: 3510 self.raise_error("Expecting )", self._curr) 3511 3512 paren = 1 3513 start = self._curr 3514 3515 while self._curr and paren > 0: 3516 if self._curr.token_type == TokenType.L_PAREN: 3517 paren += 1 3518 if self._curr.token_type == TokenType.R_PAREN: 3519 paren -= 1 3520 3521 end = self._prev 3522 self._advance() 3523 3524 if paren > 0: 3525 self.raise_error("Expecting )", self._curr) 3526 3527 pattern = exp.var(self._find_sql(start, end)) 3528 else: 3529 pattern = None 3530 3531 define = ( 3532 self._parse_csv(self._parse_name_as_expression) 3533 if self._match_text_seq("DEFINE") 3534 else None 3535 ) 3536 3537 self._match_r_paren() 3538 3539 return self.expression( 3540 exp.MatchRecognize, 3541 partition_by=partition, 3542 order=order, 3543 measures=measures, 3544 rows=rows, 3545 after=after, 3546 pattern=pattern, 3547 define=define, 3548 alias=self._parse_table_alias(), 3549 ) 3550 3551 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3552 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3553 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3554 cross_apply = False 3555 3556 if cross_apply is not None: 3557 this = self._parse_select(table=True) 3558 view = None 3559 outer = None 3560 elif self._match(TokenType.LATERAL): 3561 this = self._parse_select(table=True) 3562 view = self._match(TokenType.VIEW) 3563 outer = self._match(TokenType.OUTER) 3564 else: 3565 return None 3566 3567 if not this: 3568 this = ( 3569 self._parse_unnest() 3570 or self._parse_function() 3571 or self._parse_id_var(any_token=False) 3572 ) 3573 3574 while self._match(TokenType.DOT): 3575 this = exp.Dot( 3576 this=this, 3577 expression=self._parse_function() or self._parse_id_var(any_token=False), 3578 ) 3579 3580 ordinality: t.Optional[bool] = None 3581 3582 if view: 3583 table = self._parse_id_var(any_token=False) 3584 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3585 table_alias: t.Optional[exp.TableAlias] = self.expression( 3586 exp.TableAlias, this=table, columns=columns 3587 ) 3588 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3589 # We move the alias from the lateral's child node to the lateral itself 3590 table_alias = this.args["alias"].pop() 3591 else: 3592 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3593 table_alias = self._parse_table_alias() 3594 3595 return self.expression( 3596 exp.Lateral, 3597 this=this, 3598 view=view, 3599 outer=outer, 3600 alias=table_alias, 3601 cross_apply=cross_apply, 3602 ordinality=ordinality, 3603 ) 3604 3605 def _parse_join_parts( 3606 self, 3607 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3608 return ( 3609 self._match_set(self.JOIN_METHODS) and self._prev, 3610 self._match_set(self.JOIN_SIDES) and self._prev, 3611 self._match_set(self.JOIN_KINDS) and self._prev, 3612 ) 3613 3614 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3615 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3616 this = self._parse_column() 3617 if isinstance(this, exp.Column): 3618 return this.this 3619 return this 3620 3621 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3622 3623 def _parse_join( 3624 self, skip_join_token: bool = False, parse_bracket: bool = False 3625 ) -> t.Optional[exp.Join]: 3626 if self._match(TokenType.COMMA): 3627 table = self._try_parse(self._parse_table) 3628 if table: 3629 return self.expression(exp.Join, this=table) 3630 return None 3631 3632 index = self._index 3633 method, side, kind = self._parse_join_parts() 3634 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3635 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3636 3637 if not skip_join_token and not join: 3638 self._retreat(index) 3639 kind = None 3640 method = None 3641 side = None 3642 3643 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3644 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3645 3646 if not skip_join_token and not join and not outer_apply and not cross_apply: 3647 return None 3648 3649 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3650 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3651 kwargs["expressions"] = self._parse_csv( 3652 lambda: self._parse_table(parse_bracket=parse_bracket) 3653 ) 3654 3655 if method: 3656 kwargs["method"] = method.text 3657 if side: 3658 kwargs["side"] = side.text 3659 if kind: 3660 kwargs["kind"] = kind.text 3661 if hint: 3662 kwargs["hint"] = hint 3663 3664 if self._match(TokenType.MATCH_CONDITION): 3665 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3666 3667 if self._match(TokenType.ON): 3668 kwargs["on"] = self._parse_assignment() 3669 elif self._match(TokenType.USING): 3670 kwargs["using"] = self._parse_using_identifiers() 3671 elif ( 3672 not (outer_apply or cross_apply) 3673 and not isinstance(kwargs["this"], exp.Unnest) 3674 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3675 ): 3676 index = self._index 3677 joins: t.Optional[list] = list(self._parse_joins()) 3678 3679 if joins and self._match(TokenType.ON): 3680 kwargs["on"] = self._parse_assignment() 3681 elif joins and self._match(TokenType.USING): 3682 kwargs["using"] = self._parse_using_identifiers() 3683 else: 3684 joins = None 3685 self._retreat(index) 3686 3687 kwargs["this"].set("joins", joins if joins else None) 3688 3689 comments = [c for token in (method, side, kind) if token for c in token.comments] 3690 return self.expression(exp.Join, comments=comments, **kwargs) 3691 3692 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3693 this = self._parse_assignment() 3694 3695 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3696 return this 3697 3698 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3699 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3700 3701 return this 3702 3703 def _parse_index_params(self) -> exp.IndexParameters: 3704 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3705 3706 if self._match(TokenType.L_PAREN, advance=False): 3707 columns = self._parse_wrapped_csv(self._parse_with_operator) 3708 else: 3709 columns = None 3710 3711 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3712 partition_by = self._parse_partition_by() 3713 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3714 tablespace = ( 3715 self._parse_var(any_token=True) 3716 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3717 else None 3718 ) 3719 where = self._parse_where() 3720 3721 on = self._parse_field() if self._match(TokenType.ON) else None 3722 3723 return self.expression( 3724 exp.IndexParameters, 3725 using=using, 3726 columns=columns, 3727 include=include, 3728 partition_by=partition_by, 3729 where=where, 3730 with_storage=with_storage, 3731 tablespace=tablespace, 3732 on=on, 3733 ) 3734 3735 def _parse_index( 3736 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3737 ) -> t.Optional[exp.Index]: 3738 if index or anonymous: 3739 unique = None 3740 primary = None 3741 amp = None 3742 3743 self._match(TokenType.ON) 3744 self._match(TokenType.TABLE) # hive 3745 table = self._parse_table_parts(schema=True) 3746 else: 3747 unique = self._match(TokenType.UNIQUE) 3748 primary = self._match_text_seq("PRIMARY") 3749 amp = self._match_text_seq("AMP") 3750 3751 if not self._match(TokenType.INDEX): 3752 return None 3753 3754 index = self._parse_id_var() 3755 table = None 3756 3757 params = self._parse_index_params() 3758 3759 return self.expression( 3760 exp.Index, 3761 this=index, 3762 table=table, 3763 unique=unique, 3764 primary=primary, 3765 amp=amp, 3766 params=params, 3767 ) 3768 3769 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3770 hints: t.List[exp.Expression] = [] 3771 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3772 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3773 hints.append( 3774 self.expression( 3775 exp.WithTableHint, 3776 expressions=self._parse_csv( 3777 lambda: self._parse_function() or self._parse_var(any_token=True) 3778 ), 3779 ) 3780 ) 3781 self._match_r_paren() 3782 else: 3783 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3784 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3785 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3786 3787 self._match_set((TokenType.INDEX, TokenType.KEY)) 3788 if self._match(TokenType.FOR): 3789 hint.set("target", self._advance_any() and self._prev.text.upper()) 3790 3791 hint.set("expressions", self._parse_wrapped_id_vars()) 3792 hints.append(hint) 3793 3794 return hints or None 3795 3796 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3797 return ( 3798 (not schema and self._parse_function(optional_parens=False)) 3799 or self._parse_id_var(any_token=False) 3800 or self._parse_string_as_identifier() 3801 or self._parse_placeholder() 3802 ) 3803 3804 def _parse_table_parts( 3805 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3806 ) -> exp.Table: 3807 catalog = None 3808 db = None 3809 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3810 3811 while self._match(TokenType.DOT): 3812 if catalog: 3813 # This allows nesting the table in arbitrarily many dot expressions if needed 3814 table = self.expression( 3815 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3816 ) 3817 else: 3818 catalog = db 3819 db = table 3820 # "" used for tsql FROM a..b case 3821 table = self._parse_table_part(schema=schema) or "" 3822 3823 if ( 3824 wildcard 3825 and self._is_connected() 3826 and (isinstance(table, exp.Identifier) or not table) 3827 and self._match(TokenType.STAR) 3828 ): 3829 if isinstance(table, exp.Identifier): 3830 table.args["this"] += "*" 3831 else: 3832 table = exp.Identifier(this="*") 3833 3834 # We bubble up comments from the Identifier to the Table 3835 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3836 3837 if is_db_reference: 3838 catalog = db 3839 db = table 3840 table = None 3841 3842 if not table and not is_db_reference: 3843 self.raise_error(f"Expected table name but got {self._curr}") 3844 if not db and is_db_reference: 3845 self.raise_error(f"Expected database name but got {self._curr}") 3846 3847 table = self.expression( 3848 exp.Table, 3849 comments=comments, 3850 this=table, 3851 db=db, 3852 catalog=catalog, 3853 ) 3854 3855 changes = self._parse_changes() 3856 if changes: 3857 table.set("changes", changes) 3858 3859 at_before = self._parse_historical_data() 3860 if at_before: 3861 table.set("when", at_before) 3862 3863 pivots = self._parse_pivots() 3864 if pivots: 3865 table.set("pivots", pivots) 3866 3867 return table 3868 3869 def _parse_table( 3870 self, 3871 schema: bool = False, 3872 joins: bool = False, 3873 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3874 parse_bracket: bool = False, 3875 is_db_reference: bool = False, 3876 parse_partition: bool = False, 3877 ) -> t.Optional[exp.Expression]: 3878 lateral = self._parse_lateral() 3879 if lateral: 3880 return lateral 3881 3882 unnest = self._parse_unnest() 3883 if unnest: 3884 return unnest 3885 3886 values = self._parse_derived_table_values() 3887 if values: 3888 return values 3889 3890 subquery = self._parse_select(table=True) 3891 if subquery: 3892 if not subquery.args.get("pivots"): 3893 subquery.set("pivots", self._parse_pivots()) 3894 return subquery 3895 3896 bracket = parse_bracket and self._parse_bracket(None) 3897 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3898 3899 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3900 self._parse_table 3901 ) 3902 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3903 3904 only = self._match(TokenType.ONLY) 3905 3906 this = t.cast( 3907 exp.Expression, 3908 bracket 3909 or rows_from 3910 or self._parse_bracket( 3911 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3912 ), 3913 ) 3914 3915 if only: 3916 this.set("only", only) 3917 3918 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3919 self._match_text_seq("*") 3920 3921 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3922 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3923 this.set("partition", self._parse_partition()) 3924 3925 if schema: 3926 return self._parse_schema(this=this) 3927 3928 version = self._parse_version() 3929 3930 if version: 3931 this.set("version", version) 3932 3933 if self.dialect.ALIAS_POST_TABLESAMPLE: 3934 this.set("sample", self._parse_table_sample()) 3935 3936 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3937 if alias: 3938 this.set("alias", alias) 3939 3940 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3941 return self.expression( 3942 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3943 ) 3944 3945 this.set("hints", self._parse_table_hints()) 3946 3947 if not this.args.get("pivots"): 3948 this.set("pivots", self._parse_pivots()) 3949 3950 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3951 this.set("sample", self._parse_table_sample()) 3952 3953 if joins: 3954 for join in self._parse_joins(): 3955 this.append("joins", join) 3956 3957 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3958 this.set("ordinality", True) 3959 this.set("alias", self._parse_table_alias()) 3960 3961 return this 3962 3963 def _parse_version(self) -> t.Optional[exp.Version]: 3964 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3965 this = "TIMESTAMP" 3966 elif self._match(TokenType.VERSION_SNAPSHOT): 3967 this = "VERSION" 3968 else: 3969 return None 3970 3971 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3972 kind = self._prev.text.upper() 3973 start = self._parse_bitwise() 3974 self._match_texts(("TO", "AND")) 3975 end = self._parse_bitwise() 3976 expression: t.Optional[exp.Expression] = self.expression( 3977 exp.Tuple, expressions=[start, end] 3978 ) 3979 elif self._match_text_seq("CONTAINED", "IN"): 3980 kind = "CONTAINED IN" 3981 expression = self.expression( 3982 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3983 ) 3984 elif self._match(TokenType.ALL): 3985 kind = "ALL" 3986 expression = None 3987 else: 3988 self._match_text_seq("AS", "OF") 3989 kind = "AS OF" 3990 expression = self._parse_type() 3991 3992 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3993 3994 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3995 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3996 index = self._index 3997 historical_data = None 3998 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3999 this = self._prev.text.upper() 4000 kind = ( 4001 self._match(TokenType.L_PAREN) 4002 and self._match_texts(self.HISTORICAL_DATA_KIND) 4003 and self._prev.text.upper() 4004 ) 4005 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4006 4007 if expression: 4008 self._match_r_paren() 4009 historical_data = self.expression( 4010 exp.HistoricalData, this=this, kind=kind, expression=expression 4011 ) 4012 else: 4013 self._retreat(index) 4014 4015 return historical_data 4016 4017 def _parse_changes(self) -> t.Optional[exp.Changes]: 4018 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4019 return None 4020 4021 information = self._parse_var(any_token=True) 4022 self._match_r_paren() 4023 4024 return self.expression( 4025 exp.Changes, 4026 information=information, 4027 at_before=self._parse_historical_data(), 4028 end=self._parse_historical_data(), 4029 ) 4030 4031 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4032 if not self._match(TokenType.UNNEST): 4033 return None 4034 4035 expressions = self._parse_wrapped_csv(self._parse_equality) 4036 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4037 4038 alias = self._parse_table_alias() if with_alias else None 4039 4040 if alias: 4041 if self.dialect.UNNEST_COLUMN_ONLY: 4042 if alias.args.get("columns"): 4043 self.raise_error("Unexpected extra column alias in unnest.") 4044 4045 alias.set("columns", [alias.this]) 4046 alias.set("this", None) 4047 4048 columns = alias.args.get("columns") or [] 4049 if offset and len(expressions) < len(columns): 4050 offset = columns.pop() 4051 4052 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4053 self._match(TokenType.ALIAS) 4054 offset = self._parse_id_var( 4055 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4056 ) or exp.to_identifier("offset") 4057 4058 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4059 4060 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4061 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4062 if not is_derived and not ( 4063 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4064 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4065 ): 4066 return None 4067 4068 expressions = self._parse_csv(self._parse_value) 4069 alias = self._parse_table_alias() 4070 4071 if is_derived: 4072 self._match_r_paren() 4073 4074 return self.expression( 4075 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4076 ) 4077 4078 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4079 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4080 as_modifier and self._match_text_seq("USING", "SAMPLE") 4081 ): 4082 return None 4083 4084 bucket_numerator = None 4085 bucket_denominator = None 4086 bucket_field = None 4087 percent = None 4088 size = None 4089 seed = None 4090 4091 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4092 matched_l_paren = self._match(TokenType.L_PAREN) 4093 4094 if self.TABLESAMPLE_CSV: 4095 num = None 4096 expressions = self._parse_csv(self._parse_primary) 4097 else: 4098 expressions = None 4099 num = ( 4100 self._parse_factor() 4101 if self._match(TokenType.NUMBER, advance=False) 4102 else self._parse_primary() or self._parse_placeholder() 4103 ) 4104 4105 if self._match_text_seq("BUCKET"): 4106 bucket_numerator = self._parse_number() 4107 self._match_text_seq("OUT", "OF") 4108 bucket_denominator = bucket_denominator = self._parse_number() 4109 self._match(TokenType.ON) 4110 bucket_field = self._parse_field() 4111 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4112 percent = num 4113 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4114 size = num 4115 else: 4116 percent = num 4117 4118 if matched_l_paren: 4119 self._match_r_paren() 4120 4121 if self._match(TokenType.L_PAREN): 4122 method = self._parse_var(upper=True) 4123 seed = self._match(TokenType.COMMA) and self._parse_number() 4124 self._match_r_paren() 4125 elif self._match_texts(("SEED", "REPEATABLE")): 4126 seed = self._parse_wrapped(self._parse_number) 4127 4128 if not method and self.DEFAULT_SAMPLING_METHOD: 4129 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4130 4131 return self.expression( 4132 exp.TableSample, 4133 expressions=expressions, 4134 method=method, 4135 bucket_numerator=bucket_numerator, 4136 bucket_denominator=bucket_denominator, 4137 bucket_field=bucket_field, 4138 percent=percent, 4139 size=size, 4140 seed=seed, 4141 ) 4142 4143 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4144 return list(iter(self._parse_pivot, None)) or None 4145 4146 def _parse_joins(self) -> t.Iterator[exp.Join]: 4147 return iter(self._parse_join, None) 4148 4149 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4150 if not self._match(TokenType.INTO): 4151 return None 4152 4153 return self.expression( 4154 exp.UnpivotColumns, 4155 this=self._match_text_seq("NAME") and self._parse_column(), 4156 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4157 ) 4158 4159 # https://duckdb.org/docs/sql/statements/pivot 4160 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4161 def _parse_on() -> t.Optional[exp.Expression]: 4162 this = self._parse_bitwise() 4163 4164 if self._match(TokenType.IN): 4165 # PIVOT ... ON col IN (row_val1, row_val2) 4166 return self._parse_in(this) 4167 if self._match(TokenType.ALIAS, advance=False): 4168 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4169 return self._parse_alias(this) 4170 4171 return this 4172 4173 this = self._parse_table() 4174 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4175 into = self._parse_unpivot_columns() 4176 using = self._match(TokenType.USING) and self._parse_csv( 4177 lambda: self._parse_alias(self._parse_function()) 4178 ) 4179 group = self._parse_group() 4180 4181 return self.expression( 4182 exp.Pivot, 4183 this=this, 4184 expressions=expressions, 4185 using=using, 4186 group=group, 4187 unpivot=is_unpivot, 4188 into=into, 4189 ) 4190 4191 def _parse_pivot_in(self) -> exp.In: 4192 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4193 this = self._parse_select_or_expression() 4194 4195 self._match(TokenType.ALIAS) 4196 alias = self._parse_bitwise() 4197 if alias: 4198 if isinstance(alias, exp.Column) and not alias.db: 4199 alias = alias.this 4200 return self.expression(exp.PivotAlias, this=this, alias=alias) 4201 4202 return this 4203 4204 value = self._parse_column() 4205 4206 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4207 self.raise_error("Expecting IN (") 4208 4209 if self._match(TokenType.ANY): 4210 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4211 else: 4212 exprs = self._parse_csv(_parse_aliased_expression) 4213 4214 self._match_r_paren() 4215 return self.expression(exp.In, this=value, expressions=exprs) 4216 4217 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4218 index = self._index 4219 include_nulls = None 4220 4221 if self._match(TokenType.PIVOT): 4222 unpivot = False 4223 elif self._match(TokenType.UNPIVOT): 4224 unpivot = True 4225 4226 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4227 if self._match_text_seq("INCLUDE", "NULLS"): 4228 include_nulls = True 4229 elif self._match_text_seq("EXCLUDE", "NULLS"): 4230 include_nulls = False 4231 else: 4232 return None 4233 4234 expressions = [] 4235 4236 if not self._match(TokenType.L_PAREN): 4237 self._retreat(index) 4238 return None 4239 4240 if unpivot: 4241 expressions = self._parse_csv(self._parse_column) 4242 else: 4243 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4244 4245 if not expressions: 4246 self.raise_error("Failed to parse PIVOT's aggregation list") 4247 4248 if not self._match(TokenType.FOR): 4249 self.raise_error("Expecting FOR") 4250 4251 fields = [] 4252 while True: 4253 field = self._try_parse(self._parse_pivot_in) 4254 if not field: 4255 break 4256 fields.append(field) 4257 4258 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4259 self._parse_bitwise 4260 ) 4261 4262 group = self._parse_group() 4263 4264 self._match_r_paren() 4265 4266 pivot = self.expression( 4267 exp.Pivot, 4268 expressions=expressions, 4269 fields=fields, 4270 unpivot=unpivot, 4271 include_nulls=include_nulls, 4272 default_on_null=default_on_null, 4273 group=group, 4274 ) 4275 4276 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4277 pivot.set("alias", self._parse_table_alias()) 4278 4279 if not unpivot: 4280 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4281 4282 columns: t.List[exp.Expression] = [] 4283 all_fields = [] 4284 for pivot_field in pivot.fields: 4285 pivot_field_expressions = pivot_field.expressions 4286 4287 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4288 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4289 continue 4290 4291 all_fields.append( 4292 [ 4293 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4294 for fld in pivot_field_expressions 4295 ] 4296 ) 4297 4298 if all_fields: 4299 if names: 4300 all_fields.append(names) 4301 4302 # Generate all possible combinations of the pivot columns 4303 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4304 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4305 for fld_parts_tuple in itertools.product(*all_fields): 4306 fld_parts = list(fld_parts_tuple) 4307 4308 if names and self.PREFIXED_PIVOT_COLUMNS: 4309 # Move the "name" to the front of the list 4310 fld_parts.insert(0, fld_parts.pop(-1)) 4311 4312 columns.append(exp.to_identifier("_".join(fld_parts))) 4313 4314 pivot.set("columns", columns) 4315 4316 return pivot 4317 4318 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4319 return [agg.alias for agg in aggregations if agg.alias] 4320 4321 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4322 if not skip_where_token and not self._match(TokenType.PREWHERE): 4323 return None 4324 4325 return self.expression( 4326 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4327 ) 4328 4329 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4330 if not skip_where_token and not self._match(TokenType.WHERE): 4331 return None 4332 4333 return self.expression( 4334 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4335 ) 4336 4337 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4338 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4339 return None 4340 4341 elements: t.Dict[str, t.Any] = defaultdict(list) 4342 4343 if self._match(TokenType.ALL): 4344 elements["all"] = True 4345 elif self._match(TokenType.DISTINCT): 4346 elements["all"] = False 4347 4348 while True: 4349 index = self._index 4350 4351 elements["expressions"].extend( 4352 self._parse_csv( 4353 lambda: None 4354 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4355 else self._parse_assignment() 4356 ) 4357 ) 4358 4359 before_with_index = self._index 4360 with_prefix = self._match(TokenType.WITH) 4361 4362 if self._match(TokenType.ROLLUP): 4363 elements["rollup"].append( 4364 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4365 ) 4366 elif self._match(TokenType.CUBE): 4367 elements["cube"].append( 4368 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4369 ) 4370 elif self._match(TokenType.GROUPING_SETS): 4371 elements["grouping_sets"].append( 4372 self.expression( 4373 exp.GroupingSets, 4374 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4375 ) 4376 ) 4377 elif self._match_text_seq("TOTALS"): 4378 elements["totals"] = True # type: ignore 4379 4380 if before_with_index <= self._index <= before_with_index + 1: 4381 self._retreat(before_with_index) 4382 break 4383 4384 if index == self._index: 4385 break 4386 4387 return self.expression(exp.Group, **elements) # type: ignore 4388 4389 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4390 return self.expression( 4391 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4392 ) 4393 4394 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4395 if self._match(TokenType.L_PAREN): 4396 grouping_set = self._parse_csv(self._parse_column) 4397 self._match_r_paren() 4398 return self.expression(exp.Tuple, expressions=grouping_set) 4399 4400 return self._parse_column() 4401 4402 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4403 if not skip_having_token and not self._match(TokenType.HAVING): 4404 return None 4405 return self.expression(exp.Having, this=self._parse_assignment()) 4406 4407 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4408 if not self._match(TokenType.QUALIFY): 4409 return None 4410 return self.expression(exp.Qualify, this=self._parse_assignment()) 4411 4412 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4413 if skip_start_token: 4414 start = None 4415 elif self._match(TokenType.START_WITH): 4416 start = self._parse_assignment() 4417 else: 4418 return None 4419 4420 self._match(TokenType.CONNECT_BY) 4421 nocycle = self._match_text_seq("NOCYCLE") 4422 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4423 exp.Prior, this=self._parse_bitwise() 4424 ) 4425 connect = self._parse_assignment() 4426 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4427 4428 if not start and self._match(TokenType.START_WITH): 4429 start = self._parse_assignment() 4430 4431 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4432 4433 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4434 this = self._parse_id_var(any_token=True) 4435 if self._match(TokenType.ALIAS): 4436 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4437 return this 4438 4439 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4440 if self._match_text_seq("INTERPOLATE"): 4441 return self._parse_wrapped_csv(self._parse_name_as_expression) 4442 return None 4443 4444 def _parse_order( 4445 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4446 ) -> t.Optional[exp.Expression]: 4447 siblings = None 4448 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4449 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4450 return this 4451 4452 siblings = True 4453 4454 return self.expression( 4455 exp.Order, 4456 this=this, 4457 expressions=self._parse_csv(self._parse_ordered), 4458 siblings=siblings, 4459 ) 4460 4461 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4462 if not self._match(token): 4463 return None 4464 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4465 4466 def _parse_ordered( 4467 self, parse_method: t.Optional[t.Callable] = None 4468 ) -> t.Optional[exp.Ordered]: 4469 this = parse_method() if parse_method else self._parse_assignment() 4470 if not this: 4471 return None 4472 4473 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4474 this = exp.var("ALL") 4475 4476 asc = self._match(TokenType.ASC) 4477 desc = self._match(TokenType.DESC) or (asc and False) 4478 4479 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4480 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4481 4482 nulls_first = is_nulls_first or False 4483 explicitly_null_ordered = is_nulls_first or is_nulls_last 4484 4485 if ( 4486 not explicitly_null_ordered 4487 and ( 4488 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4489 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4490 ) 4491 and self.dialect.NULL_ORDERING != "nulls_are_last" 4492 ): 4493 nulls_first = True 4494 4495 if self._match_text_seq("WITH", "FILL"): 4496 with_fill = self.expression( 4497 exp.WithFill, 4498 **{ # type: ignore 4499 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4500 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4501 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4502 "interpolate": self._parse_interpolate(), 4503 }, 4504 ) 4505 else: 4506 with_fill = None 4507 4508 return self.expression( 4509 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4510 ) 4511 4512 def _parse_limit_options(self) -> exp.LimitOptions: 4513 percent = self._match(TokenType.PERCENT) 4514 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4515 self._match_text_seq("ONLY") 4516 with_ties = self._match_text_seq("WITH", "TIES") 4517 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4518 4519 def _parse_limit( 4520 self, 4521 this: t.Optional[exp.Expression] = None, 4522 top: bool = False, 4523 skip_limit_token: bool = False, 4524 ) -> t.Optional[exp.Expression]: 4525 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4526 comments = self._prev_comments 4527 if top: 4528 limit_paren = self._match(TokenType.L_PAREN) 4529 expression = self._parse_term() if limit_paren else self._parse_number() 4530 4531 if limit_paren: 4532 self._match_r_paren() 4533 4534 limit_options = self._parse_limit_options() 4535 else: 4536 limit_options = None 4537 expression = self._parse_term() 4538 4539 if self._match(TokenType.COMMA): 4540 offset = expression 4541 expression = self._parse_term() 4542 else: 4543 offset = None 4544 4545 limit_exp = self.expression( 4546 exp.Limit, 4547 this=this, 4548 expression=expression, 4549 offset=offset, 4550 comments=comments, 4551 limit_options=limit_options, 4552 expressions=self._parse_limit_by(), 4553 ) 4554 4555 return limit_exp 4556 4557 if self._match(TokenType.FETCH): 4558 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4559 direction = self._prev.text.upper() if direction else "FIRST" 4560 4561 count = self._parse_field(tokens=self.FETCH_TOKENS) 4562 4563 return self.expression( 4564 exp.Fetch, 4565 direction=direction, 4566 count=count, 4567 limit_options=self._parse_limit_options(), 4568 ) 4569 4570 return this 4571 4572 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4573 if not self._match(TokenType.OFFSET): 4574 return this 4575 4576 count = self._parse_term() 4577 self._match_set((TokenType.ROW, TokenType.ROWS)) 4578 4579 return self.expression( 4580 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4581 ) 4582 4583 def _can_parse_limit_or_offset(self) -> bool: 4584 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4585 return False 4586 4587 index = self._index 4588 result = bool( 4589 self._try_parse(self._parse_limit, retreat=True) 4590 or self._try_parse(self._parse_offset, retreat=True) 4591 ) 4592 self._retreat(index) 4593 return result 4594 4595 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4596 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4597 4598 def _parse_locks(self) -> t.List[exp.Lock]: 4599 locks = [] 4600 while True: 4601 if self._match_text_seq("FOR", "UPDATE"): 4602 update = True 4603 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4604 "LOCK", "IN", "SHARE", "MODE" 4605 ): 4606 update = False 4607 else: 4608 break 4609 4610 expressions = None 4611 if self._match_text_seq("OF"): 4612 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4613 4614 wait: t.Optional[bool | exp.Expression] = None 4615 if self._match_text_seq("NOWAIT"): 4616 wait = True 4617 elif self._match_text_seq("WAIT"): 4618 wait = self._parse_primary() 4619 elif self._match_text_seq("SKIP", "LOCKED"): 4620 wait = False 4621 4622 locks.append( 4623 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4624 ) 4625 4626 return locks 4627 4628 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4629 start = self._index 4630 _, side_token, kind_token = self._parse_join_parts() 4631 4632 side = side_token.text if side_token else None 4633 kind = kind_token.text if kind_token else None 4634 4635 if not self._match_set(self.SET_OPERATIONS): 4636 self._retreat(start) 4637 return None 4638 4639 token_type = self._prev.token_type 4640 4641 if token_type == TokenType.UNION: 4642 operation: t.Type[exp.SetOperation] = exp.Union 4643 elif token_type == TokenType.EXCEPT: 4644 operation = exp.Except 4645 else: 4646 operation = exp.Intersect 4647 4648 comments = self._prev.comments 4649 4650 if self._match(TokenType.DISTINCT): 4651 distinct: t.Optional[bool] = True 4652 elif self._match(TokenType.ALL): 4653 distinct = False 4654 else: 4655 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4656 if distinct is None: 4657 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4658 4659 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4660 "STRICT", "CORRESPONDING" 4661 ) 4662 if self._match_text_seq("CORRESPONDING"): 4663 by_name = True 4664 if not side and not kind: 4665 kind = "INNER" 4666 4667 on_column_list = None 4668 if by_name and self._match_texts(("ON", "BY")): 4669 on_column_list = self._parse_wrapped_csv(self._parse_column) 4670 4671 expression = self._parse_select(nested=True, parse_set_operation=False) 4672 4673 return self.expression( 4674 operation, 4675 comments=comments, 4676 this=this, 4677 distinct=distinct, 4678 by_name=by_name, 4679 expression=expression, 4680 side=side, 4681 kind=kind, 4682 on=on_column_list, 4683 ) 4684 4685 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4686 while True: 4687 setop = self.parse_set_operation(this) 4688 if not setop: 4689 break 4690 this = setop 4691 4692 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4693 expression = this.expression 4694 4695 if expression: 4696 for arg in self.SET_OP_MODIFIERS: 4697 expr = expression.args.get(arg) 4698 if expr: 4699 this.set(arg, expr.pop()) 4700 4701 return this 4702 4703 def _parse_expression(self) -> t.Optional[exp.Expression]: 4704 return self._parse_alias(self._parse_assignment()) 4705 4706 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4707 this = self._parse_disjunction() 4708 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4709 # This allows us to parse <non-identifier token> := <expr> 4710 this = exp.column( 4711 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4712 ) 4713 4714 while self._match_set(self.ASSIGNMENT): 4715 if isinstance(this, exp.Column) and len(this.parts) == 1: 4716 this = this.this 4717 4718 this = self.expression( 4719 self.ASSIGNMENT[self._prev.token_type], 4720 this=this, 4721 comments=self._prev_comments, 4722 expression=self._parse_assignment(), 4723 ) 4724 4725 return this 4726 4727 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4728 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4729 4730 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4731 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4732 4733 def _parse_equality(self) -> t.Optional[exp.Expression]: 4734 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4735 4736 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4737 return self._parse_tokens(self._parse_range, self.COMPARISON) 4738 4739 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4740 this = this or self._parse_bitwise() 4741 negate = self._match(TokenType.NOT) 4742 4743 if self._match_set(self.RANGE_PARSERS): 4744 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4745 if not expression: 4746 return this 4747 4748 this = expression 4749 elif self._match(TokenType.ISNULL): 4750 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4751 4752 # Postgres supports ISNULL and NOTNULL for conditions. 4753 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4754 if self._match(TokenType.NOTNULL): 4755 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4756 this = self.expression(exp.Not, this=this) 4757 4758 if negate: 4759 this = self._negate_range(this) 4760 4761 if self._match(TokenType.IS): 4762 this = self._parse_is(this) 4763 4764 return this 4765 4766 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4767 if not this: 4768 return this 4769 4770 return self.expression(exp.Not, this=this) 4771 4772 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4773 index = self._index - 1 4774 negate = self._match(TokenType.NOT) 4775 4776 if self._match_text_seq("DISTINCT", "FROM"): 4777 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4778 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4779 4780 if self._match(TokenType.JSON): 4781 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4782 4783 if self._match_text_seq("WITH"): 4784 _with = True 4785 elif self._match_text_seq("WITHOUT"): 4786 _with = False 4787 else: 4788 _with = None 4789 4790 unique = self._match(TokenType.UNIQUE) 4791 self._match_text_seq("KEYS") 4792 expression: t.Optional[exp.Expression] = self.expression( 4793 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4794 ) 4795 else: 4796 expression = self._parse_primary() or self._parse_null() 4797 if not expression: 4798 self._retreat(index) 4799 return None 4800 4801 this = self.expression(exp.Is, this=this, expression=expression) 4802 return self.expression(exp.Not, this=this) if negate else this 4803 4804 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4805 unnest = self._parse_unnest(with_alias=False) 4806 if unnest: 4807 this = self.expression(exp.In, this=this, unnest=unnest) 4808 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4809 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4810 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4811 4812 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4813 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4814 else: 4815 this = self.expression(exp.In, this=this, expressions=expressions) 4816 4817 if matched_l_paren: 4818 self._match_r_paren(this) 4819 elif not self._match(TokenType.R_BRACKET, expression=this): 4820 self.raise_error("Expecting ]") 4821 else: 4822 this = self.expression(exp.In, this=this, field=self._parse_column()) 4823 4824 return this 4825 4826 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4827 low = self._parse_bitwise() 4828 self._match(TokenType.AND) 4829 high = self._parse_bitwise() 4830 return self.expression(exp.Between, this=this, low=low, high=high) 4831 4832 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4833 if not self._match(TokenType.ESCAPE): 4834 return this 4835 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4836 4837 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4838 index = self._index 4839 4840 if not self._match(TokenType.INTERVAL) and match_interval: 4841 return None 4842 4843 if self._match(TokenType.STRING, advance=False): 4844 this = self._parse_primary() 4845 else: 4846 this = self._parse_term() 4847 4848 if not this or ( 4849 isinstance(this, exp.Column) 4850 and not this.table 4851 and not this.this.quoted 4852 and this.name.upper() == "IS" 4853 ): 4854 self._retreat(index) 4855 return None 4856 4857 unit = self._parse_function() or ( 4858 not self._match(TokenType.ALIAS, advance=False) 4859 and self._parse_var(any_token=True, upper=True) 4860 ) 4861 4862 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4863 # each INTERVAL expression into this canonical form so it's easy to transpile 4864 if this and this.is_number: 4865 this = exp.Literal.string(this.to_py()) 4866 elif this and this.is_string: 4867 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4868 if parts and unit: 4869 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4870 unit = None 4871 self._retreat(self._index - 1) 4872 4873 if len(parts) == 1: 4874 this = exp.Literal.string(parts[0][0]) 4875 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4876 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4877 unit = self.expression( 4878 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4879 ) 4880 4881 interval = self.expression(exp.Interval, this=this, unit=unit) 4882 4883 index = self._index 4884 self._match(TokenType.PLUS) 4885 4886 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4887 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4888 return self.expression( 4889 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4890 ) 4891 4892 self._retreat(index) 4893 return interval 4894 4895 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4896 this = self._parse_term() 4897 4898 while True: 4899 if self._match_set(self.BITWISE): 4900 this = self.expression( 4901 self.BITWISE[self._prev.token_type], 4902 this=this, 4903 expression=self._parse_term(), 4904 ) 4905 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4906 this = self.expression( 4907 exp.DPipe, 4908 this=this, 4909 expression=self._parse_term(), 4910 safe=not self.dialect.STRICT_STRING_CONCAT, 4911 ) 4912 elif self._match(TokenType.DQMARK): 4913 this = self.expression( 4914 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4915 ) 4916 elif self._match_pair(TokenType.LT, TokenType.LT): 4917 this = self.expression( 4918 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4919 ) 4920 elif self._match_pair(TokenType.GT, TokenType.GT): 4921 this = self.expression( 4922 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4923 ) 4924 else: 4925 break 4926 4927 return this 4928 4929 def _parse_term(self) -> t.Optional[exp.Expression]: 4930 this = self._parse_factor() 4931 4932 while self._match_set(self.TERM): 4933 klass = self.TERM[self._prev.token_type] 4934 comments = self._prev_comments 4935 expression = self._parse_factor() 4936 4937 this = self.expression(klass, this=this, comments=comments, expression=expression) 4938 4939 if isinstance(this, exp.Collate): 4940 expr = this.expression 4941 4942 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4943 # fallback to Identifier / Var 4944 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4945 ident = expr.this 4946 if isinstance(ident, exp.Identifier): 4947 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4948 4949 return this 4950 4951 def _parse_factor(self) -> t.Optional[exp.Expression]: 4952 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4953 this = parse_method() 4954 4955 while self._match_set(self.FACTOR): 4956 klass = self.FACTOR[self._prev.token_type] 4957 comments = self._prev_comments 4958 expression = parse_method() 4959 4960 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4961 self._retreat(self._index - 1) 4962 return this 4963 4964 this = self.expression(klass, this=this, comments=comments, expression=expression) 4965 4966 if isinstance(this, exp.Div): 4967 this.args["typed"] = self.dialect.TYPED_DIVISION 4968 this.args["safe"] = self.dialect.SAFE_DIVISION 4969 4970 return this 4971 4972 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4973 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4974 4975 def _parse_unary(self) -> t.Optional[exp.Expression]: 4976 if self._match_set(self.UNARY_PARSERS): 4977 return self.UNARY_PARSERS[self._prev.token_type](self) 4978 return self._parse_at_time_zone(self._parse_type()) 4979 4980 def _parse_type( 4981 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4982 ) -> t.Optional[exp.Expression]: 4983 interval = parse_interval and self._parse_interval() 4984 if interval: 4985 return interval 4986 4987 index = self._index 4988 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4989 4990 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4991 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4992 if isinstance(data_type, exp.Cast): 4993 # This constructor can contain ops directly after it, for instance struct unnesting: 4994 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4995 return self._parse_column_ops(data_type) 4996 4997 if data_type: 4998 index2 = self._index 4999 this = self._parse_primary() 5000 5001 if isinstance(this, exp.Literal): 5002 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5003 if parser: 5004 return parser(self, this, data_type) 5005 5006 return self.expression(exp.Cast, this=this, to=data_type) 5007 5008 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5009 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5010 # 5011 # If the index difference here is greater than 1, that means the parser itself must have 5012 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5013 # 5014 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5015 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5016 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5017 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5018 # 5019 # In these cases, we don't really want to return the converted type, but instead retreat 5020 # and try to parse a Column or Identifier in the section below. 5021 if data_type.expressions and index2 - index > 1: 5022 self._retreat(index2) 5023 return self._parse_column_ops(data_type) 5024 5025 self._retreat(index) 5026 5027 if fallback_to_identifier: 5028 return self._parse_id_var() 5029 5030 this = self._parse_column() 5031 return this and self._parse_column_ops(this) 5032 5033 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5034 this = self._parse_type() 5035 if not this: 5036 return None 5037 5038 if isinstance(this, exp.Column) and not this.table: 5039 this = exp.var(this.name.upper()) 5040 5041 return self.expression( 5042 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5043 ) 5044 5045 def _parse_types( 5046 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5047 ) -> t.Optional[exp.Expression]: 5048 index = self._index 5049 5050 this: t.Optional[exp.Expression] = None 5051 prefix = self._match_text_seq("SYSUDTLIB", ".") 5052 5053 if not self._match_set(self.TYPE_TOKENS): 5054 identifier = allow_identifiers and self._parse_id_var( 5055 any_token=False, tokens=(TokenType.VAR,) 5056 ) 5057 if isinstance(identifier, exp.Identifier): 5058 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5059 5060 if len(tokens) != 1: 5061 self.raise_error("Unexpected identifier", self._prev) 5062 5063 if tokens[0].token_type in self.TYPE_TOKENS: 5064 self._prev = tokens[0] 5065 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5066 type_name = identifier.name 5067 5068 while self._match(TokenType.DOT): 5069 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5070 5071 this = exp.DataType.build(type_name, udt=True) 5072 else: 5073 self._retreat(self._index - 1) 5074 return None 5075 else: 5076 return None 5077 5078 type_token = self._prev.token_type 5079 5080 if type_token == TokenType.PSEUDO_TYPE: 5081 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5082 5083 if type_token == TokenType.OBJECT_IDENTIFIER: 5084 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5085 5086 # https://materialize.com/docs/sql/types/map/ 5087 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5088 key_type = self._parse_types( 5089 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5090 ) 5091 if not self._match(TokenType.FARROW): 5092 self._retreat(index) 5093 return None 5094 5095 value_type = self._parse_types( 5096 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5097 ) 5098 if not self._match(TokenType.R_BRACKET): 5099 self._retreat(index) 5100 return None 5101 5102 return exp.DataType( 5103 this=exp.DataType.Type.MAP, 5104 expressions=[key_type, value_type], 5105 nested=True, 5106 prefix=prefix, 5107 ) 5108 5109 nested = type_token in self.NESTED_TYPE_TOKENS 5110 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5111 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5112 expressions = None 5113 maybe_func = False 5114 5115 if self._match(TokenType.L_PAREN): 5116 if is_struct: 5117 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5118 elif nested: 5119 expressions = self._parse_csv( 5120 lambda: self._parse_types( 5121 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5122 ) 5123 ) 5124 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5125 this = expressions[0] 5126 this.set("nullable", True) 5127 self._match_r_paren() 5128 return this 5129 elif type_token in self.ENUM_TYPE_TOKENS: 5130 expressions = self._parse_csv(self._parse_equality) 5131 elif is_aggregate: 5132 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5133 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5134 ) 5135 if not func_or_ident: 5136 return None 5137 expressions = [func_or_ident] 5138 if self._match(TokenType.COMMA): 5139 expressions.extend( 5140 self._parse_csv( 5141 lambda: self._parse_types( 5142 check_func=check_func, 5143 schema=schema, 5144 allow_identifiers=allow_identifiers, 5145 ) 5146 ) 5147 ) 5148 else: 5149 expressions = self._parse_csv(self._parse_type_size) 5150 5151 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5152 if type_token == TokenType.VECTOR and len(expressions) == 2: 5153 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5154 5155 if not expressions or not self._match(TokenType.R_PAREN): 5156 self._retreat(index) 5157 return None 5158 5159 maybe_func = True 5160 5161 values: t.Optional[t.List[exp.Expression]] = None 5162 5163 if nested and self._match(TokenType.LT): 5164 if is_struct: 5165 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5166 else: 5167 expressions = self._parse_csv( 5168 lambda: self._parse_types( 5169 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5170 ) 5171 ) 5172 5173 if not self._match(TokenType.GT): 5174 self.raise_error("Expecting >") 5175 5176 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5177 values = self._parse_csv(self._parse_assignment) 5178 if not values and is_struct: 5179 values = None 5180 self._retreat(self._index - 1) 5181 else: 5182 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5183 5184 if type_token in self.TIMESTAMPS: 5185 if self._match_text_seq("WITH", "TIME", "ZONE"): 5186 maybe_func = False 5187 tz_type = ( 5188 exp.DataType.Type.TIMETZ 5189 if type_token in self.TIMES 5190 else exp.DataType.Type.TIMESTAMPTZ 5191 ) 5192 this = exp.DataType(this=tz_type, expressions=expressions) 5193 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5194 maybe_func = False 5195 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5196 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5197 maybe_func = False 5198 elif type_token == TokenType.INTERVAL: 5199 unit = self._parse_var(upper=True) 5200 if unit: 5201 if self._match_text_seq("TO"): 5202 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5203 5204 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5205 else: 5206 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5207 5208 if maybe_func and check_func: 5209 index2 = self._index 5210 peek = self._parse_string() 5211 5212 if not peek: 5213 self._retreat(index) 5214 return None 5215 5216 self._retreat(index2) 5217 5218 if not this: 5219 if self._match_text_seq("UNSIGNED"): 5220 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5221 if not unsigned_type_token: 5222 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5223 5224 type_token = unsigned_type_token or type_token 5225 5226 this = exp.DataType( 5227 this=exp.DataType.Type[type_token.value], 5228 expressions=expressions, 5229 nested=nested, 5230 prefix=prefix, 5231 ) 5232 5233 # Empty arrays/structs are allowed 5234 if values is not None: 5235 cls = exp.Struct if is_struct else exp.Array 5236 this = exp.cast(cls(expressions=values), this, copy=False) 5237 5238 elif expressions: 5239 this.set("expressions", expressions) 5240 5241 # https://materialize.com/docs/sql/types/list/#type-name 5242 while self._match(TokenType.LIST): 5243 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5244 5245 index = self._index 5246 5247 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5248 matched_array = self._match(TokenType.ARRAY) 5249 5250 while self._curr: 5251 datatype_token = self._prev.token_type 5252 matched_l_bracket = self._match(TokenType.L_BRACKET) 5253 5254 if (not matched_l_bracket and not matched_array) or ( 5255 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5256 ): 5257 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5258 # not to be confused with the fixed size array parsing 5259 break 5260 5261 matched_array = False 5262 values = self._parse_csv(self._parse_assignment) or None 5263 if ( 5264 values 5265 and not schema 5266 and ( 5267 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5268 ) 5269 ): 5270 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5271 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5272 self._retreat(index) 5273 break 5274 5275 this = exp.DataType( 5276 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5277 ) 5278 self._match(TokenType.R_BRACKET) 5279 5280 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5281 converter = self.TYPE_CONVERTERS.get(this.this) 5282 if converter: 5283 this = converter(t.cast(exp.DataType, this)) 5284 5285 return this 5286 5287 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5288 index = self._index 5289 5290 if ( 5291 self._curr 5292 and self._next 5293 and self._curr.token_type in self.TYPE_TOKENS 5294 and self._next.token_type in self.TYPE_TOKENS 5295 ): 5296 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5297 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5298 this = self._parse_id_var() 5299 else: 5300 this = ( 5301 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5302 or self._parse_id_var() 5303 ) 5304 5305 self._match(TokenType.COLON) 5306 5307 if ( 5308 type_required 5309 and not isinstance(this, exp.DataType) 5310 and not self._match_set(self.TYPE_TOKENS, advance=False) 5311 ): 5312 self._retreat(index) 5313 return self._parse_types() 5314 5315 return self._parse_column_def(this) 5316 5317 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5318 if not self._match_text_seq("AT", "TIME", "ZONE"): 5319 return this 5320 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5321 5322 def _parse_column(self) -> t.Optional[exp.Expression]: 5323 this = self._parse_column_reference() 5324 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5325 5326 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5327 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5328 5329 return column 5330 5331 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5332 this = self._parse_field() 5333 if ( 5334 not this 5335 and self._match(TokenType.VALUES, advance=False) 5336 and self.VALUES_FOLLOWED_BY_PAREN 5337 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5338 ): 5339 this = self._parse_id_var() 5340 5341 if isinstance(this, exp.Identifier): 5342 # We bubble up comments from the Identifier to the Column 5343 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5344 5345 return this 5346 5347 def _parse_colon_as_variant_extract( 5348 self, this: t.Optional[exp.Expression] 5349 ) -> t.Optional[exp.Expression]: 5350 casts = [] 5351 json_path = [] 5352 escape = None 5353 5354 while self._match(TokenType.COLON): 5355 start_index = self._index 5356 5357 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5358 path = self._parse_column_ops( 5359 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5360 ) 5361 5362 # The cast :: operator has a lower precedence than the extraction operator :, so 5363 # we rearrange the AST appropriately to avoid casting the JSON path 5364 while isinstance(path, exp.Cast): 5365 casts.append(path.to) 5366 path = path.this 5367 5368 if casts: 5369 dcolon_offset = next( 5370 i 5371 for i, t in enumerate(self._tokens[start_index:]) 5372 if t.token_type == TokenType.DCOLON 5373 ) 5374 end_token = self._tokens[start_index + dcolon_offset - 1] 5375 else: 5376 end_token = self._prev 5377 5378 if path: 5379 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5380 # it'll roundtrip to a string literal in GET_PATH 5381 if isinstance(path, exp.Identifier) and path.quoted: 5382 escape = True 5383 5384 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5385 5386 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5387 # Databricks transforms it back to the colon/dot notation 5388 if json_path: 5389 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5390 5391 if json_path_expr: 5392 json_path_expr.set("escape", escape) 5393 5394 this = self.expression( 5395 exp.JSONExtract, 5396 this=this, 5397 expression=json_path_expr, 5398 variant_extract=True, 5399 ) 5400 5401 while casts: 5402 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5403 5404 return this 5405 5406 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5407 return self._parse_types() 5408 5409 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5410 this = self._parse_bracket(this) 5411 5412 while self._match_set(self.COLUMN_OPERATORS): 5413 op_token = self._prev.token_type 5414 op = self.COLUMN_OPERATORS.get(op_token) 5415 5416 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5417 field = self._parse_dcolon() 5418 if not field: 5419 self.raise_error("Expected type") 5420 elif op and self._curr: 5421 field = self._parse_column_reference() or self._parse_bracket() 5422 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5423 field = self._parse_column_ops(field) 5424 else: 5425 field = self._parse_field(any_token=True, anonymous_func=True) 5426 5427 if isinstance(field, (exp.Func, exp.Window)) and this: 5428 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5429 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5430 this = exp.replace_tree( 5431 this, 5432 lambda n: ( 5433 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5434 if n.table 5435 else n.this 5436 ) 5437 if isinstance(n, exp.Column) 5438 else n, 5439 ) 5440 5441 if op: 5442 this = op(self, this, field) 5443 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5444 this = self.expression( 5445 exp.Column, 5446 comments=this.comments, 5447 this=field, 5448 table=this.this, 5449 db=this.args.get("table"), 5450 catalog=this.args.get("db"), 5451 ) 5452 elif isinstance(field, exp.Window): 5453 # Move the exp.Dot's to the window's function 5454 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5455 field.set("this", window_func) 5456 this = field 5457 else: 5458 this = self.expression(exp.Dot, this=this, expression=field) 5459 5460 if field and field.comments: 5461 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5462 5463 this = self._parse_bracket(this) 5464 5465 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5466 5467 def _parse_primary(self) -> t.Optional[exp.Expression]: 5468 if self._match_set(self.PRIMARY_PARSERS): 5469 token_type = self._prev.token_type 5470 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5471 5472 if token_type == TokenType.STRING: 5473 expressions = [primary] 5474 while self._match(TokenType.STRING): 5475 expressions.append(exp.Literal.string(self._prev.text)) 5476 5477 if len(expressions) > 1: 5478 return self.expression(exp.Concat, expressions=expressions) 5479 5480 return primary 5481 5482 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5483 return exp.Literal.number(f"0.{self._prev.text}") 5484 5485 if self._match(TokenType.L_PAREN): 5486 comments = self._prev_comments 5487 query = self._parse_select() 5488 5489 if query: 5490 expressions = [query] 5491 else: 5492 expressions = self._parse_expressions() 5493 5494 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5495 5496 if not this and self._match(TokenType.R_PAREN, advance=False): 5497 this = self.expression(exp.Tuple) 5498 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5499 this = self._parse_subquery(this=this, parse_alias=False) 5500 elif isinstance(this, exp.Subquery): 5501 this = self._parse_subquery( 5502 this=self._parse_set_operations(this), parse_alias=False 5503 ) 5504 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5505 this = self.expression(exp.Tuple, expressions=expressions) 5506 else: 5507 this = self.expression(exp.Paren, this=this) 5508 5509 if this: 5510 this.add_comments(comments) 5511 5512 self._match_r_paren(expression=this) 5513 return this 5514 5515 return None 5516 5517 def _parse_field( 5518 self, 5519 any_token: bool = False, 5520 tokens: t.Optional[t.Collection[TokenType]] = None, 5521 anonymous_func: bool = False, 5522 ) -> t.Optional[exp.Expression]: 5523 if anonymous_func: 5524 field = ( 5525 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5526 or self._parse_primary() 5527 ) 5528 else: 5529 field = self._parse_primary() or self._parse_function( 5530 anonymous=anonymous_func, any_token=any_token 5531 ) 5532 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5533 5534 def _parse_function( 5535 self, 5536 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5537 anonymous: bool = False, 5538 optional_parens: bool = True, 5539 any_token: bool = False, 5540 ) -> t.Optional[exp.Expression]: 5541 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5542 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5543 fn_syntax = False 5544 if ( 5545 self._match(TokenType.L_BRACE, advance=False) 5546 and self._next 5547 and self._next.text.upper() == "FN" 5548 ): 5549 self._advance(2) 5550 fn_syntax = True 5551 5552 func = self._parse_function_call( 5553 functions=functions, 5554 anonymous=anonymous, 5555 optional_parens=optional_parens, 5556 any_token=any_token, 5557 ) 5558 5559 if fn_syntax: 5560 self._match(TokenType.R_BRACE) 5561 5562 return func 5563 5564 def _parse_function_call( 5565 self, 5566 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5567 anonymous: bool = False, 5568 optional_parens: bool = True, 5569 any_token: bool = False, 5570 ) -> t.Optional[exp.Expression]: 5571 if not self._curr: 5572 return None 5573 5574 comments = self._curr.comments 5575 token_type = self._curr.token_type 5576 this = self._curr.text 5577 upper = this.upper() 5578 5579 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5580 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5581 self._advance() 5582 return self._parse_window(parser(self)) 5583 5584 if not self._next or self._next.token_type != TokenType.L_PAREN: 5585 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5586 self._advance() 5587 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5588 5589 return None 5590 5591 if any_token: 5592 if token_type in self.RESERVED_TOKENS: 5593 return None 5594 elif token_type not in self.FUNC_TOKENS: 5595 return None 5596 5597 self._advance(2) 5598 5599 parser = self.FUNCTION_PARSERS.get(upper) 5600 if parser and not anonymous: 5601 this = parser(self) 5602 else: 5603 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5604 5605 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5606 this = self.expression( 5607 subquery_predicate, comments=comments, this=self._parse_select() 5608 ) 5609 self._match_r_paren() 5610 return this 5611 5612 if functions is None: 5613 functions = self.FUNCTIONS 5614 5615 function = functions.get(upper) 5616 known_function = function and not anonymous 5617 5618 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5619 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5620 5621 post_func_comments = self._curr and self._curr.comments 5622 if known_function and post_func_comments: 5623 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5624 # call we'll construct it as exp.Anonymous, even if it's "known" 5625 if any( 5626 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5627 for comment in post_func_comments 5628 ): 5629 known_function = False 5630 5631 if alias and known_function: 5632 args = self._kv_to_prop_eq(args) 5633 5634 if known_function: 5635 func_builder = t.cast(t.Callable, function) 5636 5637 if "dialect" in func_builder.__code__.co_varnames: 5638 func = func_builder(args, dialect=self.dialect) 5639 else: 5640 func = func_builder(args) 5641 5642 func = self.validate_expression(func, args) 5643 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5644 func.meta["name"] = this 5645 5646 this = func 5647 else: 5648 if token_type == TokenType.IDENTIFIER: 5649 this = exp.Identifier(this=this, quoted=True) 5650 this = self.expression(exp.Anonymous, this=this, expressions=args) 5651 5652 if isinstance(this, exp.Expression): 5653 this.add_comments(comments) 5654 5655 self._match_r_paren(this) 5656 return self._parse_window(this) 5657 5658 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5659 return expression 5660 5661 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5662 transformed = [] 5663 5664 for index, e in enumerate(expressions): 5665 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5666 if isinstance(e, exp.Alias): 5667 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5668 5669 if not isinstance(e, exp.PropertyEQ): 5670 e = self.expression( 5671 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5672 ) 5673 5674 if isinstance(e.this, exp.Column): 5675 e.this.replace(e.this.this) 5676 else: 5677 e = self._to_prop_eq(e, index) 5678 5679 transformed.append(e) 5680 5681 return transformed 5682 5683 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5684 return self._parse_statement() 5685 5686 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5687 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5688 5689 def _parse_user_defined_function( 5690 self, kind: t.Optional[TokenType] = None 5691 ) -> t.Optional[exp.Expression]: 5692 this = self._parse_table_parts(schema=True) 5693 5694 if not self._match(TokenType.L_PAREN): 5695 return this 5696 5697 expressions = self._parse_csv(self._parse_function_parameter) 5698 self._match_r_paren() 5699 return self.expression( 5700 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5701 ) 5702 5703 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5704 literal = self._parse_primary() 5705 if literal: 5706 return self.expression(exp.Introducer, this=token.text, expression=literal) 5707 5708 return self.expression(exp.Identifier, this=token.text) 5709 5710 def _parse_session_parameter(self) -> exp.SessionParameter: 5711 kind = None 5712 this = self._parse_id_var() or self._parse_primary() 5713 5714 if this and self._match(TokenType.DOT): 5715 kind = this.name 5716 this = self._parse_var() or self._parse_primary() 5717 5718 return self.expression(exp.SessionParameter, this=this, kind=kind) 5719 5720 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5721 return self._parse_id_var() 5722 5723 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5724 index = self._index 5725 5726 if self._match(TokenType.L_PAREN): 5727 expressions = t.cast( 5728 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5729 ) 5730 5731 if not self._match(TokenType.R_PAREN): 5732 self._retreat(index) 5733 else: 5734 expressions = [self._parse_lambda_arg()] 5735 5736 if self._match_set(self.LAMBDAS): 5737 return self.LAMBDAS[self._prev.token_type](self, expressions) 5738 5739 self._retreat(index) 5740 5741 this: t.Optional[exp.Expression] 5742 5743 if self._match(TokenType.DISTINCT): 5744 this = self.expression( 5745 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5746 ) 5747 else: 5748 this = self._parse_select_or_expression(alias=alias) 5749 5750 return self._parse_limit( 5751 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5752 ) 5753 5754 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5755 index = self._index 5756 if not self._match(TokenType.L_PAREN): 5757 return this 5758 5759 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5760 # expr can be of both types 5761 if self._match_set(self.SELECT_START_TOKENS): 5762 self._retreat(index) 5763 return this 5764 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5765 self._match_r_paren() 5766 return self.expression(exp.Schema, this=this, expressions=args) 5767 5768 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5769 return self._parse_column_def(self._parse_field(any_token=True)) 5770 5771 def _parse_column_def( 5772 self, this: t.Optional[exp.Expression], computed_column: bool = True 5773 ) -> t.Optional[exp.Expression]: 5774 # column defs are not really columns, they're identifiers 5775 if isinstance(this, exp.Column): 5776 this = this.this 5777 5778 if not computed_column: 5779 self._match(TokenType.ALIAS) 5780 5781 kind = self._parse_types(schema=True) 5782 5783 if self._match_text_seq("FOR", "ORDINALITY"): 5784 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5785 5786 constraints: t.List[exp.Expression] = [] 5787 5788 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5789 ("ALIAS", "MATERIALIZED") 5790 ): 5791 persisted = self._prev.text.upper() == "MATERIALIZED" 5792 constraint_kind = exp.ComputedColumnConstraint( 5793 this=self._parse_assignment(), 5794 persisted=persisted or self._match_text_seq("PERSISTED"), 5795 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5796 ) 5797 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5798 elif ( 5799 kind 5800 and self._match(TokenType.ALIAS, advance=False) 5801 and ( 5802 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5803 or (self._next and self._next.token_type == TokenType.L_PAREN) 5804 ) 5805 ): 5806 self._advance() 5807 constraints.append( 5808 self.expression( 5809 exp.ColumnConstraint, 5810 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5811 ) 5812 ) 5813 5814 while True: 5815 constraint = self._parse_column_constraint() 5816 if not constraint: 5817 break 5818 constraints.append(constraint) 5819 5820 if not kind and not constraints: 5821 return this 5822 5823 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5824 5825 def _parse_auto_increment( 5826 self, 5827 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5828 start = None 5829 increment = None 5830 5831 if self._match(TokenType.L_PAREN, advance=False): 5832 args = self._parse_wrapped_csv(self._parse_bitwise) 5833 start = seq_get(args, 0) 5834 increment = seq_get(args, 1) 5835 elif self._match_text_seq("START"): 5836 start = self._parse_bitwise() 5837 self._match_text_seq("INCREMENT") 5838 increment = self._parse_bitwise() 5839 5840 if start and increment: 5841 return exp.GeneratedAsIdentityColumnConstraint( 5842 start=start, increment=increment, this=False 5843 ) 5844 5845 return exp.AutoIncrementColumnConstraint() 5846 5847 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5848 if not self._match_text_seq("REFRESH"): 5849 self._retreat(self._index - 1) 5850 return None 5851 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5852 5853 def _parse_compress(self) -> exp.CompressColumnConstraint: 5854 if self._match(TokenType.L_PAREN, advance=False): 5855 return self.expression( 5856 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5857 ) 5858 5859 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5860 5861 def _parse_generated_as_identity( 5862 self, 5863 ) -> ( 5864 exp.GeneratedAsIdentityColumnConstraint 5865 | exp.ComputedColumnConstraint 5866 | exp.GeneratedAsRowColumnConstraint 5867 ): 5868 if self._match_text_seq("BY", "DEFAULT"): 5869 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5870 this = self.expression( 5871 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5872 ) 5873 else: 5874 self._match_text_seq("ALWAYS") 5875 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5876 5877 self._match(TokenType.ALIAS) 5878 5879 if self._match_text_seq("ROW"): 5880 start = self._match_text_seq("START") 5881 if not start: 5882 self._match(TokenType.END) 5883 hidden = self._match_text_seq("HIDDEN") 5884 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5885 5886 identity = self._match_text_seq("IDENTITY") 5887 5888 if self._match(TokenType.L_PAREN): 5889 if self._match(TokenType.START_WITH): 5890 this.set("start", self._parse_bitwise()) 5891 if self._match_text_seq("INCREMENT", "BY"): 5892 this.set("increment", self._parse_bitwise()) 5893 if self._match_text_seq("MINVALUE"): 5894 this.set("minvalue", self._parse_bitwise()) 5895 if self._match_text_seq("MAXVALUE"): 5896 this.set("maxvalue", self._parse_bitwise()) 5897 5898 if self._match_text_seq("CYCLE"): 5899 this.set("cycle", True) 5900 elif self._match_text_seq("NO", "CYCLE"): 5901 this.set("cycle", False) 5902 5903 if not identity: 5904 this.set("expression", self._parse_range()) 5905 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5906 args = self._parse_csv(self._parse_bitwise) 5907 this.set("start", seq_get(args, 0)) 5908 this.set("increment", seq_get(args, 1)) 5909 5910 self._match_r_paren() 5911 5912 return this 5913 5914 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5915 self._match_text_seq("LENGTH") 5916 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5917 5918 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5919 if self._match_text_seq("NULL"): 5920 return self.expression(exp.NotNullColumnConstraint) 5921 if self._match_text_seq("CASESPECIFIC"): 5922 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5923 if self._match_text_seq("FOR", "REPLICATION"): 5924 return self.expression(exp.NotForReplicationColumnConstraint) 5925 5926 # Unconsume the `NOT` token 5927 self._retreat(self._index - 1) 5928 return None 5929 5930 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5931 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5932 5933 procedure_option_follows = ( 5934 self._match(TokenType.WITH, advance=False) 5935 and self._next 5936 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5937 ) 5938 5939 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5940 return self.expression( 5941 exp.ColumnConstraint, 5942 this=this, 5943 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5944 ) 5945 5946 return this 5947 5948 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5949 if not self._match(TokenType.CONSTRAINT): 5950 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5951 5952 return self.expression( 5953 exp.Constraint, 5954 this=self._parse_id_var(), 5955 expressions=self._parse_unnamed_constraints(), 5956 ) 5957 5958 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5959 constraints = [] 5960 while True: 5961 constraint = self._parse_unnamed_constraint() or self._parse_function() 5962 if not constraint: 5963 break 5964 constraints.append(constraint) 5965 5966 return constraints 5967 5968 def _parse_unnamed_constraint( 5969 self, constraints: t.Optional[t.Collection[str]] = None 5970 ) -> t.Optional[exp.Expression]: 5971 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5972 constraints or self.CONSTRAINT_PARSERS 5973 ): 5974 return None 5975 5976 constraint = self._prev.text.upper() 5977 if constraint not in self.CONSTRAINT_PARSERS: 5978 self.raise_error(f"No parser found for schema constraint {constraint}.") 5979 5980 return self.CONSTRAINT_PARSERS[constraint](self) 5981 5982 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5983 return self._parse_id_var(any_token=False) 5984 5985 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5986 self._match_text_seq("KEY") 5987 return self.expression( 5988 exp.UniqueColumnConstraint, 5989 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5990 this=self._parse_schema(self._parse_unique_key()), 5991 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5992 on_conflict=self._parse_on_conflict(), 5993 ) 5994 5995 def _parse_key_constraint_options(self) -> t.List[str]: 5996 options = [] 5997 while True: 5998 if not self._curr: 5999 break 6000 6001 if self._match(TokenType.ON): 6002 action = None 6003 on = self._advance_any() and self._prev.text 6004 6005 if self._match_text_seq("NO", "ACTION"): 6006 action = "NO ACTION" 6007 elif self._match_text_seq("CASCADE"): 6008 action = "CASCADE" 6009 elif self._match_text_seq("RESTRICT"): 6010 action = "RESTRICT" 6011 elif self._match_pair(TokenType.SET, TokenType.NULL): 6012 action = "SET NULL" 6013 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6014 action = "SET DEFAULT" 6015 else: 6016 self.raise_error("Invalid key constraint") 6017 6018 options.append(f"ON {on} {action}") 6019 else: 6020 var = self._parse_var_from_options( 6021 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6022 ) 6023 if not var: 6024 break 6025 options.append(var.name) 6026 6027 return options 6028 6029 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6030 if match and not self._match(TokenType.REFERENCES): 6031 return None 6032 6033 expressions = None 6034 this = self._parse_table(schema=True) 6035 options = self._parse_key_constraint_options() 6036 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6037 6038 def _parse_foreign_key(self) -> exp.ForeignKey: 6039 expressions = self._parse_wrapped_id_vars() 6040 reference = self._parse_references() 6041 options = {} 6042 6043 while self._match(TokenType.ON): 6044 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6045 self.raise_error("Expected DELETE or UPDATE") 6046 6047 kind = self._prev.text.lower() 6048 6049 if self._match_text_seq("NO", "ACTION"): 6050 action = "NO ACTION" 6051 elif self._match(TokenType.SET): 6052 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6053 action = "SET " + self._prev.text.upper() 6054 else: 6055 self._advance() 6056 action = self._prev.text.upper() 6057 6058 options[kind] = action 6059 6060 return self.expression( 6061 exp.ForeignKey, 6062 expressions=expressions, 6063 reference=reference, 6064 **options, # type: ignore 6065 ) 6066 6067 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6068 return self._parse_ordered() or self._parse_field() 6069 6070 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6071 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6072 self._retreat(self._index - 1) 6073 return None 6074 6075 id_vars = self._parse_wrapped_id_vars() 6076 return self.expression( 6077 exp.PeriodForSystemTimeConstraint, 6078 this=seq_get(id_vars, 0), 6079 expression=seq_get(id_vars, 1), 6080 ) 6081 6082 def _parse_primary_key( 6083 self, wrapped_optional: bool = False, in_props: bool = False 6084 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6085 desc = ( 6086 self._match_set((TokenType.ASC, TokenType.DESC)) 6087 and self._prev.token_type == TokenType.DESC 6088 ) 6089 6090 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6091 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6092 6093 expressions = self._parse_wrapped_csv( 6094 self._parse_primary_key_part, optional=wrapped_optional 6095 ) 6096 options = self._parse_key_constraint_options() 6097 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6098 6099 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6100 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6101 6102 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6103 """ 6104 Parses a datetime column in ODBC format. We parse the column into the corresponding 6105 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6106 same as we did for `DATE('yyyy-mm-dd')`. 6107 6108 Reference: 6109 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6110 """ 6111 self._match(TokenType.VAR) 6112 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6113 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6114 if not self._match(TokenType.R_BRACE): 6115 self.raise_error("Expected }") 6116 return expression 6117 6118 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6119 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6120 return this 6121 6122 bracket_kind = self._prev.token_type 6123 if ( 6124 bracket_kind == TokenType.L_BRACE 6125 and self._curr 6126 and self._curr.token_type == TokenType.VAR 6127 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6128 ): 6129 return self._parse_odbc_datetime_literal() 6130 6131 expressions = self._parse_csv( 6132 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6133 ) 6134 6135 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6136 self.raise_error("Expected ]") 6137 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6138 self.raise_error("Expected }") 6139 6140 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6141 if bracket_kind == TokenType.L_BRACE: 6142 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6143 elif not this: 6144 this = build_array_constructor( 6145 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6146 ) 6147 else: 6148 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6149 if constructor_type: 6150 return build_array_constructor( 6151 constructor_type, 6152 args=expressions, 6153 bracket_kind=bracket_kind, 6154 dialect=self.dialect, 6155 ) 6156 6157 expressions = apply_index_offset( 6158 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6159 ) 6160 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6161 6162 self._add_comments(this) 6163 return self._parse_bracket(this) 6164 6165 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6166 if self._match(TokenType.COLON): 6167 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6168 return this 6169 6170 def _parse_case(self) -> t.Optional[exp.Expression]: 6171 ifs = [] 6172 default = None 6173 6174 comments = self._prev_comments 6175 expression = self._parse_assignment() 6176 6177 while self._match(TokenType.WHEN): 6178 this = self._parse_assignment() 6179 self._match(TokenType.THEN) 6180 then = self._parse_assignment() 6181 ifs.append(self.expression(exp.If, this=this, true=then)) 6182 6183 if self._match(TokenType.ELSE): 6184 default = self._parse_assignment() 6185 6186 if not self._match(TokenType.END): 6187 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6188 default = exp.column("interval") 6189 else: 6190 self.raise_error("Expected END after CASE", self._prev) 6191 6192 return self.expression( 6193 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6194 ) 6195 6196 def _parse_if(self) -> t.Optional[exp.Expression]: 6197 if self._match(TokenType.L_PAREN): 6198 args = self._parse_csv( 6199 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6200 ) 6201 this = self.validate_expression(exp.If.from_arg_list(args), args) 6202 self._match_r_paren() 6203 else: 6204 index = self._index - 1 6205 6206 if self.NO_PAREN_IF_COMMANDS and index == 0: 6207 return self._parse_as_command(self._prev) 6208 6209 condition = self._parse_assignment() 6210 6211 if not condition: 6212 self._retreat(index) 6213 return None 6214 6215 self._match(TokenType.THEN) 6216 true = self._parse_assignment() 6217 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6218 self._match(TokenType.END) 6219 this = self.expression(exp.If, this=condition, true=true, false=false) 6220 6221 return this 6222 6223 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6224 if not self._match_text_seq("VALUE", "FOR"): 6225 self._retreat(self._index - 1) 6226 return None 6227 6228 return self.expression( 6229 exp.NextValueFor, 6230 this=self._parse_column(), 6231 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6232 ) 6233 6234 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6235 this = self._parse_function() or self._parse_var_or_string(upper=True) 6236 6237 if self._match(TokenType.FROM): 6238 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6239 6240 if not self._match(TokenType.COMMA): 6241 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6242 6243 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6244 6245 def _parse_gap_fill(self) -> exp.GapFill: 6246 self._match(TokenType.TABLE) 6247 this = self._parse_table() 6248 6249 self._match(TokenType.COMMA) 6250 args = [this, *self._parse_csv(self._parse_lambda)] 6251 6252 gap_fill = exp.GapFill.from_arg_list(args) 6253 return self.validate_expression(gap_fill, args) 6254 6255 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6256 this = self._parse_assignment() 6257 6258 if not self._match(TokenType.ALIAS): 6259 if self._match(TokenType.COMMA): 6260 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6261 6262 self.raise_error("Expected AS after CAST") 6263 6264 fmt = None 6265 to = self._parse_types() 6266 6267 default = self._match(TokenType.DEFAULT) 6268 if default: 6269 default = self._parse_bitwise() 6270 self._match_text_seq("ON", "CONVERSION", "ERROR") 6271 6272 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6273 fmt_string = self._parse_string() 6274 fmt = self._parse_at_time_zone(fmt_string) 6275 6276 if not to: 6277 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6278 if to.this in exp.DataType.TEMPORAL_TYPES: 6279 this = self.expression( 6280 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6281 this=this, 6282 format=exp.Literal.string( 6283 format_time( 6284 fmt_string.this if fmt_string else "", 6285 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6286 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6287 ) 6288 ), 6289 safe=safe, 6290 ) 6291 6292 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6293 this.set("zone", fmt.args["zone"]) 6294 return this 6295 elif not to: 6296 self.raise_error("Expected TYPE after CAST") 6297 elif isinstance(to, exp.Identifier): 6298 to = exp.DataType.build(to.name, udt=True) 6299 elif to.this == exp.DataType.Type.CHAR: 6300 if self._match(TokenType.CHARACTER_SET): 6301 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6302 6303 return self.expression( 6304 exp.Cast if strict else exp.TryCast, 6305 this=this, 6306 to=to, 6307 format=fmt, 6308 safe=safe, 6309 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6310 default=default, 6311 ) 6312 6313 def _parse_string_agg(self) -> exp.GroupConcat: 6314 if self._match(TokenType.DISTINCT): 6315 args: t.List[t.Optional[exp.Expression]] = [ 6316 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6317 ] 6318 if self._match(TokenType.COMMA): 6319 args.extend(self._parse_csv(self._parse_assignment)) 6320 else: 6321 args = self._parse_csv(self._parse_assignment) # type: ignore 6322 6323 if self._match_text_seq("ON", "OVERFLOW"): 6324 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6325 if self._match_text_seq("ERROR"): 6326 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6327 else: 6328 self._match_text_seq("TRUNCATE") 6329 on_overflow = self.expression( 6330 exp.OverflowTruncateBehavior, 6331 this=self._parse_string(), 6332 with_count=( 6333 self._match_text_seq("WITH", "COUNT") 6334 or not self._match_text_seq("WITHOUT", "COUNT") 6335 ), 6336 ) 6337 else: 6338 on_overflow = None 6339 6340 index = self._index 6341 if not self._match(TokenType.R_PAREN) and args: 6342 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6343 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6344 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6345 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6346 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6347 6348 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6349 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6350 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6351 if not self._match_text_seq("WITHIN", "GROUP"): 6352 self._retreat(index) 6353 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6354 6355 # The corresponding match_r_paren will be called in parse_function (caller) 6356 self._match_l_paren() 6357 6358 return self.expression( 6359 exp.GroupConcat, 6360 this=self._parse_order(this=seq_get(args, 0)), 6361 separator=seq_get(args, 1), 6362 on_overflow=on_overflow, 6363 ) 6364 6365 def _parse_convert( 6366 self, strict: bool, safe: t.Optional[bool] = None 6367 ) -> t.Optional[exp.Expression]: 6368 this = self._parse_bitwise() 6369 6370 if self._match(TokenType.USING): 6371 to: t.Optional[exp.Expression] = self.expression( 6372 exp.CharacterSet, this=self._parse_var() 6373 ) 6374 elif self._match(TokenType.COMMA): 6375 to = self._parse_types() 6376 else: 6377 to = None 6378 6379 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6380 6381 def _parse_xml_table(self) -> exp.XMLTable: 6382 namespaces = None 6383 passing = None 6384 columns = None 6385 6386 if self._match_text_seq("XMLNAMESPACES", "("): 6387 namespaces = self._parse_xml_namespace() 6388 self._match_text_seq(")", ",") 6389 6390 this = self._parse_string() 6391 6392 if self._match_text_seq("PASSING"): 6393 # The BY VALUE keywords are optional and are provided for semantic clarity 6394 self._match_text_seq("BY", "VALUE") 6395 passing = self._parse_csv(self._parse_column) 6396 6397 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6398 6399 if self._match_text_seq("COLUMNS"): 6400 columns = self._parse_csv(self._parse_field_def) 6401 6402 return self.expression( 6403 exp.XMLTable, 6404 this=this, 6405 namespaces=namespaces, 6406 passing=passing, 6407 columns=columns, 6408 by_ref=by_ref, 6409 ) 6410 6411 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6412 namespaces = [] 6413 6414 while True: 6415 if self._match(TokenType.DEFAULT): 6416 uri = self._parse_string() 6417 else: 6418 uri = self._parse_alias(self._parse_string()) 6419 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6420 if not self._match(TokenType.COMMA): 6421 break 6422 6423 return namespaces 6424 6425 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6426 """ 6427 There are generally two variants of the DECODE function: 6428 6429 - DECODE(bin, charset) 6430 - DECODE(expression, search, result [, search, result] ... [, default]) 6431 6432 The second variant will always be parsed into a CASE expression. Note that NULL 6433 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6434 instead of relying on pattern matching. 6435 """ 6436 args = self._parse_csv(self._parse_assignment) 6437 6438 if len(args) < 3: 6439 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6440 6441 expression, *expressions = args 6442 if not expression: 6443 return None 6444 6445 ifs = [] 6446 for search, result in zip(expressions[::2], expressions[1::2]): 6447 if not search or not result: 6448 return None 6449 6450 if isinstance(search, exp.Literal): 6451 ifs.append( 6452 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6453 ) 6454 elif isinstance(search, exp.Null): 6455 ifs.append( 6456 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6457 ) 6458 else: 6459 cond = exp.or_( 6460 exp.EQ(this=expression.copy(), expression=search), 6461 exp.and_( 6462 exp.Is(this=expression.copy(), expression=exp.Null()), 6463 exp.Is(this=search.copy(), expression=exp.Null()), 6464 copy=False, 6465 ), 6466 copy=False, 6467 ) 6468 ifs.append(exp.If(this=cond, true=result)) 6469 6470 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6471 6472 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6473 self._match_text_seq("KEY") 6474 key = self._parse_column() 6475 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6476 self._match_text_seq("VALUE") 6477 value = self._parse_bitwise() 6478 6479 if not key and not value: 6480 return None 6481 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6482 6483 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6484 if not this or not self._match_text_seq("FORMAT", "JSON"): 6485 return this 6486 6487 return self.expression(exp.FormatJson, this=this) 6488 6489 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6490 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6491 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6492 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6493 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6494 else: 6495 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6496 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6497 6498 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6499 6500 if not empty and not error and not null: 6501 return None 6502 6503 return self.expression( 6504 exp.OnCondition, 6505 empty=empty, 6506 error=error, 6507 null=null, 6508 ) 6509 6510 def _parse_on_handling( 6511 self, on: str, *values: str 6512 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6513 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6514 for value in values: 6515 if self._match_text_seq(value, "ON", on): 6516 return f"{value} ON {on}" 6517 6518 index = self._index 6519 if self._match(TokenType.DEFAULT): 6520 default_value = self._parse_bitwise() 6521 if self._match_text_seq("ON", on): 6522 return default_value 6523 6524 self._retreat(index) 6525 6526 return None 6527 6528 @t.overload 6529 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6530 6531 @t.overload 6532 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6533 6534 def _parse_json_object(self, agg=False): 6535 star = self._parse_star() 6536 expressions = ( 6537 [star] 6538 if star 6539 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6540 ) 6541 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6542 6543 unique_keys = None 6544 if self._match_text_seq("WITH", "UNIQUE"): 6545 unique_keys = True 6546 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6547 unique_keys = False 6548 6549 self._match_text_seq("KEYS") 6550 6551 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6552 self._parse_type() 6553 ) 6554 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6555 6556 return self.expression( 6557 exp.JSONObjectAgg if agg else exp.JSONObject, 6558 expressions=expressions, 6559 null_handling=null_handling, 6560 unique_keys=unique_keys, 6561 return_type=return_type, 6562 encoding=encoding, 6563 ) 6564 6565 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6566 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6567 if not self._match_text_seq("NESTED"): 6568 this = self._parse_id_var() 6569 kind = self._parse_types(allow_identifiers=False) 6570 nested = None 6571 else: 6572 this = None 6573 kind = None 6574 nested = True 6575 6576 path = self._match_text_seq("PATH") and self._parse_string() 6577 nested_schema = nested and self._parse_json_schema() 6578 6579 return self.expression( 6580 exp.JSONColumnDef, 6581 this=this, 6582 kind=kind, 6583 path=path, 6584 nested_schema=nested_schema, 6585 ) 6586 6587 def _parse_json_schema(self) -> exp.JSONSchema: 6588 self._match_text_seq("COLUMNS") 6589 return self.expression( 6590 exp.JSONSchema, 6591 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6592 ) 6593 6594 def _parse_json_table(self) -> exp.JSONTable: 6595 this = self._parse_format_json(self._parse_bitwise()) 6596 path = self._match(TokenType.COMMA) and self._parse_string() 6597 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6598 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6599 schema = self._parse_json_schema() 6600 6601 return exp.JSONTable( 6602 this=this, 6603 schema=schema, 6604 path=path, 6605 error_handling=error_handling, 6606 empty_handling=empty_handling, 6607 ) 6608 6609 def _parse_match_against(self) -> exp.MatchAgainst: 6610 expressions = self._parse_csv(self._parse_column) 6611 6612 self._match_text_seq(")", "AGAINST", "(") 6613 6614 this = self._parse_string() 6615 6616 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6617 modifier = "IN NATURAL LANGUAGE MODE" 6618 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6619 modifier = f"{modifier} WITH QUERY EXPANSION" 6620 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6621 modifier = "IN BOOLEAN MODE" 6622 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6623 modifier = "WITH QUERY EXPANSION" 6624 else: 6625 modifier = None 6626 6627 return self.expression( 6628 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6629 ) 6630 6631 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6632 def _parse_open_json(self) -> exp.OpenJSON: 6633 this = self._parse_bitwise() 6634 path = self._match(TokenType.COMMA) and self._parse_string() 6635 6636 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6637 this = self._parse_field(any_token=True) 6638 kind = self._parse_types() 6639 path = self._parse_string() 6640 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6641 6642 return self.expression( 6643 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6644 ) 6645 6646 expressions = None 6647 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6648 self._match_l_paren() 6649 expressions = self._parse_csv(_parse_open_json_column_def) 6650 6651 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6652 6653 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6654 args = self._parse_csv(self._parse_bitwise) 6655 6656 if self._match(TokenType.IN): 6657 return self.expression( 6658 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6659 ) 6660 6661 if haystack_first: 6662 haystack = seq_get(args, 0) 6663 needle = seq_get(args, 1) 6664 else: 6665 haystack = seq_get(args, 1) 6666 needle = seq_get(args, 0) 6667 6668 return self.expression( 6669 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6670 ) 6671 6672 def _parse_predict(self) -> exp.Predict: 6673 self._match_text_seq("MODEL") 6674 this = self._parse_table() 6675 6676 self._match(TokenType.COMMA) 6677 self._match_text_seq("TABLE") 6678 6679 return self.expression( 6680 exp.Predict, 6681 this=this, 6682 expression=self._parse_table(), 6683 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6684 ) 6685 6686 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6687 args = self._parse_csv(self._parse_table) 6688 return exp.JoinHint(this=func_name.upper(), expressions=args) 6689 6690 def _parse_substring(self) -> exp.Substring: 6691 # Postgres supports the form: substring(string [from int] [for int]) 6692 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6693 6694 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6695 6696 if self._match(TokenType.FROM): 6697 args.append(self._parse_bitwise()) 6698 if self._match(TokenType.FOR): 6699 if len(args) == 1: 6700 args.append(exp.Literal.number(1)) 6701 args.append(self._parse_bitwise()) 6702 6703 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6704 6705 def _parse_trim(self) -> exp.Trim: 6706 # https://www.w3resource.com/sql/character-functions/trim.php 6707 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6708 6709 position = None 6710 collation = None 6711 expression = None 6712 6713 if self._match_texts(self.TRIM_TYPES): 6714 position = self._prev.text.upper() 6715 6716 this = self._parse_bitwise() 6717 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6718 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6719 expression = self._parse_bitwise() 6720 6721 if invert_order: 6722 this, expression = expression, this 6723 6724 if self._match(TokenType.COLLATE): 6725 collation = self._parse_bitwise() 6726 6727 return self.expression( 6728 exp.Trim, this=this, position=position, expression=expression, collation=collation 6729 ) 6730 6731 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6732 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6733 6734 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6735 return self._parse_window(self._parse_id_var(), alias=True) 6736 6737 def _parse_respect_or_ignore_nulls( 6738 self, this: t.Optional[exp.Expression] 6739 ) -> t.Optional[exp.Expression]: 6740 if self._match_text_seq("IGNORE", "NULLS"): 6741 return self.expression(exp.IgnoreNulls, this=this) 6742 if self._match_text_seq("RESPECT", "NULLS"): 6743 return self.expression(exp.RespectNulls, this=this) 6744 return this 6745 6746 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6747 if self._match(TokenType.HAVING): 6748 self._match_texts(("MAX", "MIN")) 6749 max = self._prev.text.upper() != "MIN" 6750 return self.expression( 6751 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6752 ) 6753 6754 return this 6755 6756 def _parse_window( 6757 self, this: t.Optional[exp.Expression], alias: bool = False 6758 ) -> t.Optional[exp.Expression]: 6759 func = this 6760 comments = func.comments if isinstance(func, exp.Expression) else None 6761 6762 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6763 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6764 if self._match_text_seq("WITHIN", "GROUP"): 6765 order = self._parse_wrapped(self._parse_order) 6766 this = self.expression(exp.WithinGroup, this=this, expression=order) 6767 6768 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6769 self._match(TokenType.WHERE) 6770 this = self.expression( 6771 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6772 ) 6773 self._match_r_paren() 6774 6775 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6776 # Some dialects choose to implement and some do not. 6777 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6778 6779 # There is some code above in _parse_lambda that handles 6780 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6781 6782 # The below changes handle 6783 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6784 6785 # Oracle allows both formats 6786 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6787 # and Snowflake chose to do the same for familiarity 6788 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6789 if isinstance(this, exp.AggFunc): 6790 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6791 6792 if ignore_respect and ignore_respect is not this: 6793 ignore_respect.replace(ignore_respect.this) 6794 this = self.expression(ignore_respect.__class__, this=this) 6795 6796 this = self._parse_respect_or_ignore_nulls(this) 6797 6798 # bigquery select from window x AS (partition by ...) 6799 if alias: 6800 over = None 6801 self._match(TokenType.ALIAS) 6802 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6803 return this 6804 else: 6805 over = self._prev.text.upper() 6806 6807 if comments and isinstance(func, exp.Expression): 6808 func.pop_comments() 6809 6810 if not self._match(TokenType.L_PAREN): 6811 return self.expression( 6812 exp.Window, 6813 comments=comments, 6814 this=this, 6815 alias=self._parse_id_var(False), 6816 over=over, 6817 ) 6818 6819 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6820 6821 first = self._match(TokenType.FIRST) 6822 if self._match_text_seq("LAST"): 6823 first = False 6824 6825 partition, order = self._parse_partition_and_order() 6826 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6827 6828 if kind: 6829 self._match(TokenType.BETWEEN) 6830 start = self._parse_window_spec() 6831 self._match(TokenType.AND) 6832 end = self._parse_window_spec() 6833 6834 spec = self.expression( 6835 exp.WindowSpec, 6836 kind=kind, 6837 start=start["value"], 6838 start_side=start["side"], 6839 end=end["value"], 6840 end_side=end["side"], 6841 ) 6842 else: 6843 spec = None 6844 6845 self._match_r_paren() 6846 6847 window = self.expression( 6848 exp.Window, 6849 comments=comments, 6850 this=this, 6851 partition_by=partition, 6852 order=order, 6853 spec=spec, 6854 alias=window_alias, 6855 over=over, 6856 first=first, 6857 ) 6858 6859 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6860 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6861 return self._parse_window(window, alias=alias) 6862 6863 return window 6864 6865 def _parse_partition_and_order( 6866 self, 6867 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6868 return self._parse_partition_by(), self._parse_order() 6869 6870 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6871 self._match(TokenType.BETWEEN) 6872 6873 return { 6874 "value": ( 6875 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6876 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6877 or self._parse_bitwise() 6878 ), 6879 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6880 } 6881 6882 def _parse_alias( 6883 self, this: t.Optional[exp.Expression], explicit: bool = False 6884 ) -> t.Optional[exp.Expression]: 6885 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6886 # so this section tries to parse the clause version and if it fails, it treats the token 6887 # as an identifier (alias) 6888 if self._can_parse_limit_or_offset(): 6889 return this 6890 6891 any_token = self._match(TokenType.ALIAS) 6892 comments = self._prev_comments or [] 6893 6894 if explicit and not any_token: 6895 return this 6896 6897 if self._match(TokenType.L_PAREN): 6898 aliases = self.expression( 6899 exp.Aliases, 6900 comments=comments, 6901 this=this, 6902 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6903 ) 6904 self._match_r_paren(aliases) 6905 return aliases 6906 6907 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6908 self.STRING_ALIASES and self._parse_string_as_identifier() 6909 ) 6910 6911 if alias: 6912 comments.extend(alias.pop_comments()) 6913 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6914 column = this.this 6915 6916 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6917 if not this.comments and column and column.comments: 6918 this.comments = column.pop_comments() 6919 6920 return this 6921 6922 def _parse_id_var( 6923 self, 6924 any_token: bool = True, 6925 tokens: t.Optional[t.Collection[TokenType]] = None, 6926 ) -> t.Optional[exp.Expression]: 6927 expression = self._parse_identifier() 6928 if not expression and ( 6929 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6930 ): 6931 quoted = self._prev.token_type == TokenType.STRING 6932 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6933 6934 return expression 6935 6936 def _parse_string(self) -> t.Optional[exp.Expression]: 6937 if self._match_set(self.STRING_PARSERS): 6938 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6939 return self._parse_placeholder() 6940 6941 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6942 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6943 6944 def _parse_number(self) -> t.Optional[exp.Expression]: 6945 if self._match_set(self.NUMERIC_PARSERS): 6946 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6947 return self._parse_placeholder() 6948 6949 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6950 if self._match(TokenType.IDENTIFIER): 6951 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6952 return self._parse_placeholder() 6953 6954 def _parse_var( 6955 self, 6956 any_token: bool = False, 6957 tokens: t.Optional[t.Collection[TokenType]] = None, 6958 upper: bool = False, 6959 ) -> t.Optional[exp.Expression]: 6960 if ( 6961 (any_token and self._advance_any()) 6962 or self._match(TokenType.VAR) 6963 or (self._match_set(tokens) if tokens else False) 6964 ): 6965 return self.expression( 6966 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6967 ) 6968 return self._parse_placeholder() 6969 6970 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6971 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6972 self._advance() 6973 return self._prev 6974 return None 6975 6976 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6977 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6978 6979 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6980 return self._parse_primary() or self._parse_var(any_token=True) 6981 6982 def _parse_null(self) -> t.Optional[exp.Expression]: 6983 if self._match_set(self.NULL_TOKENS): 6984 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6985 return self._parse_placeholder() 6986 6987 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6988 if self._match(TokenType.TRUE): 6989 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6990 if self._match(TokenType.FALSE): 6991 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6992 return self._parse_placeholder() 6993 6994 def _parse_star(self) -> t.Optional[exp.Expression]: 6995 if self._match(TokenType.STAR): 6996 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6997 return self._parse_placeholder() 6998 6999 def _parse_parameter(self) -> exp.Parameter: 7000 this = self._parse_identifier() or self._parse_primary_or_var() 7001 return self.expression(exp.Parameter, this=this) 7002 7003 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7004 if self._match_set(self.PLACEHOLDER_PARSERS): 7005 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7006 if placeholder: 7007 return placeholder 7008 self._advance(-1) 7009 return None 7010 7011 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7012 if not self._match_texts(keywords): 7013 return None 7014 if self._match(TokenType.L_PAREN, advance=False): 7015 return self._parse_wrapped_csv(self._parse_expression) 7016 7017 expression = self._parse_expression() 7018 return [expression] if expression else None 7019 7020 def _parse_csv( 7021 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7022 ) -> t.List[exp.Expression]: 7023 parse_result = parse_method() 7024 items = [parse_result] if parse_result is not None else [] 7025 7026 while self._match(sep): 7027 self._add_comments(parse_result) 7028 parse_result = parse_method() 7029 if parse_result is not None: 7030 items.append(parse_result) 7031 7032 return items 7033 7034 def _parse_tokens( 7035 self, parse_method: t.Callable, expressions: t.Dict 7036 ) -> t.Optional[exp.Expression]: 7037 this = parse_method() 7038 7039 while self._match_set(expressions): 7040 this = self.expression( 7041 expressions[self._prev.token_type], 7042 this=this, 7043 comments=self._prev_comments, 7044 expression=parse_method(), 7045 ) 7046 7047 return this 7048 7049 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7050 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7051 7052 def _parse_wrapped_csv( 7053 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7054 ) -> t.List[exp.Expression]: 7055 return self._parse_wrapped( 7056 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7057 ) 7058 7059 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7060 wrapped = self._match(TokenType.L_PAREN) 7061 if not wrapped and not optional: 7062 self.raise_error("Expecting (") 7063 parse_result = parse_method() 7064 if wrapped: 7065 self._match_r_paren() 7066 return parse_result 7067 7068 def _parse_expressions(self) -> t.List[exp.Expression]: 7069 return self._parse_csv(self._parse_expression) 7070 7071 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7072 return self._parse_select() or self._parse_set_operations( 7073 self._parse_alias(self._parse_assignment(), explicit=True) 7074 if alias 7075 else self._parse_assignment() 7076 ) 7077 7078 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7079 return self._parse_query_modifiers( 7080 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7081 ) 7082 7083 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7084 this = None 7085 if self._match_texts(self.TRANSACTION_KIND): 7086 this = self._prev.text 7087 7088 self._match_texts(("TRANSACTION", "WORK")) 7089 7090 modes = [] 7091 while True: 7092 mode = [] 7093 while self._match(TokenType.VAR): 7094 mode.append(self._prev.text) 7095 7096 if mode: 7097 modes.append(" ".join(mode)) 7098 if not self._match(TokenType.COMMA): 7099 break 7100 7101 return self.expression(exp.Transaction, this=this, modes=modes) 7102 7103 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7104 chain = None 7105 savepoint = None 7106 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7107 7108 self._match_texts(("TRANSACTION", "WORK")) 7109 7110 if self._match_text_seq("TO"): 7111 self._match_text_seq("SAVEPOINT") 7112 savepoint = self._parse_id_var() 7113 7114 if self._match(TokenType.AND): 7115 chain = not self._match_text_seq("NO") 7116 self._match_text_seq("CHAIN") 7117 7118 if is_rollback: 7119 return self.expression(exp.Rollback, savepoint=savepoint) 7120 7121 return self.expression(exp.Commit, chain=chain) 7122 7123 def _parse_refresh(self) -> exp.Refresh: 7124 self._match(TokenType.TABLE) 7125 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7126 7127 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7128 if not self._match_text_seq("ADD"): 7129 return None 7130 7131 self._match(TokenType.COLUMN) 7132 exists_column = self._parse_exists(not_=True) 7133 expression = self._parse_field_def() 7134 7135 if expression: 7136 expression.set("exists", exists_column) 7137 7138 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7139 if self._match_texts(("FIRST", "AFTER")): 7140 position = self._prev.text 7141 column_position = self.expression( 7142 exp.ColumnPosition, this=self._parse_column(), position=position 7143 ) 7144 expression.set("position", column_position) 7145 7146 return expression 7147 7148 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7149 drop = self._match(TokenType.DROP) and self._parse_drop() 7150 if drop and not isinstance(drop, exp.Command): 7151 drop.set("kind", drop.args.get("kind", "COLUMN")) 7152 return drop 7153 7154 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7155 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7156 return self.expression( 7157 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7158 ) 7159 7160 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7161 index = self._index - 1 7162 7163 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7164 return self._parse_csv( 7165 lambda: self.expression( 7166 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7167 ) 7168 ) 7169 7170 self._retreat(index) 7171 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7172 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7173 7174 if self._match_text_seq("ADD", "COLUMNS"): 7175 schema = self._parse_schema() 7176 if schema: 7177 return [schema] 7178 return [] 7179 7180 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7181 7182 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7183 if self._match_texts(self.ALTER_ALTER_PARSERS): 7184 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7185 7186 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7187 # keyword after ALTER we default to parsing this statement 7188 self._match(TokenType.COLUMN) 7189 column = self._parse_field(any_token=True) 7190 7191 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7192 return self.expression(exp.AlterColumn, this=column, drop=True) 7193 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7194 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7195 if self._match(TokenType.COMMENT): 7196 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7197 if self._match_text_seq("DROP", "NOT", "NULL"): 7198 return self.expression( 7199 exp.AlterColumn, 7200 this=column, 7201 drop=True, 7202 allow_null=True, 7203 ) 7204 if self._match_text_seq("SET", "NOT", "NULL"): 7205 return self.expression( 7206 exp.AlterColumn, 7207 this=column, 7208 allow_null=False, 7209 ) 7210 7211 if self._match_text_seq("SET", "VISIBLE"): 7212 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7213 if self._match_text_seq("SET", "INVISIBLE"): 7214 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7215 7216 self._match_text_seq("SET", "DATA") 7217 self._match_text_seq("TYPE") 7218 return self.expression( 7219 exp.AlterColumn, 7220 this=column, 7221 dtype=self._parse_types(), 7222 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7223 using=self._match(TokenType.USING) and self._parse_assignment(), 7224 ) 7225 7226 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7227 if self._match_texts(("ALL", "EVEN", "AUTO")): 7228 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7229 7230 self._match_text_seq("KEY", "DISTKEY") 7231 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7232 7233 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7234 if compound: 7235 self._match_text_seq("SORTKEY") 7236 7237 if self._match(TokenType.L_PAREN, advance=False): 7238 return self.expression( 7239 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7240 ) 7241 7242 self._match_texts(("AUTO", "NONE")) 7243 return self.expression( 7244 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7245 ) 7246 7247 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7248 index = self._index - 1 7249 7250 partition_exists = self._parse_exists() 7251 if self._match(TokenType.PARTITION, advance=False): 7252 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7253 7254 self._retreat(index) 7255 return self._parse_csv(self._parse_drop_column) 7256 7257 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7258 if self._match(TokenType.COLUMN): 7259 exists = self._parse_exists() 7260 old_column = self._parse_column() 7261 to = self._match_text_seq("TO") 7262 new_column = self._parse_column() 7263 7264 if old_column is None or to is None or new_column is None: 7265 return None 7266 7267 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7268 7269 self._match_text_seq("TO") 7270 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7271 7272 def _parse_alter_table_set(self) -> exp.AlterSet: 7273 alter_set = self.expression(exp.AlterSet) 7274 7275 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7276 "TABLE", "PROPERTIES" 7277 ): 7278 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7279 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7280 alter_set.set("expressions", [self._parse_assignment()]) 7281 elif self._match_texts(("LOGGED", "UNLOGGED")): 7282 alter_set.set("option", exp.var(self._prev.text.upper())) 7283 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7284 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7285 elif self._match_text_seq("LOCATION"): 7286 alter_set.set("location", self._parse_field()) 7287 elif self._match_text_seq("ACCESS", "METHOD"): 7288 alter_set.set("access_method", self._parse_field()) 7289 elif self._match_text_seq("TABLESPACE"): 7290 alter_set.set("tablespace", self._parse_field()) 7291 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7292 alter_set.set("file_format", [self._parse_field()]) 7293 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7294 alter_set.set("file_format", self._parse_wrapped_options()) 7295 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7296 alter_set.set("copy_options", self._parse_wrapped_options()) 7297 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7298 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7299 else: 7300 if self._match_text_seq("SERDE"): 7301 alter_set.set("serde", self._parse_field()) 7302 7303 alter_set.set("expressions", [self._parse_properties()]) 7304 7305 return alter_set 7306 7307 def _parse_alter(self) -> exp.Alter | exp.Command: 7308 start = self._prev 7309 7310 alter_token = self._match_set(self.ALTERABLES) and self._prev 7311 if not alter_token: 7312 return self._parse_as_command(start) 7313 7314 exists = self._parse_exists() 7315 only = self._match_text_seq("ONLY") 7316 this = self._parse_table(schema=True) 7317 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7318 7319 if self._next: 7320 self._advance() 7321 7322 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7323 if parser: 7324 actions = ensure_list(parser(self)) 7325 not_valid = self._match_text_seq("NOT", "VALID") 7326 options = self._parse_csv(self._parse_property) 7327 7328 if not self._curr and actions: 7329 return self.expression( 7330 exp.Alter, 7331 this=this, 7332 kind=alter_token.text.upper(), 7333 exists=exists, 7334 actions=actions, 7335 only=only, 7336 options=options, 7337 cluster=cluster, 7338 not_valid=not_valid, 7339 ) 7340 7341 return self._parse_as_command(start) 7342 7343 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7344 start = self._prev 7345 # https://duckdb.org/docs/sql/statements/analyze 7346 if not self._curr: 7347 return self.expression(exp.Analyze) 7348 7349 options = [] 7350 while self._match_texts(self.ANALYZE_STYLES): 7351 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7352 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7353 else: 7354 options.append(self._prev.text.upper()) 7355 7356 this: t.Optional[exp.Expression] = None 7357 inner_expression: t.Optional[exp.Expression] = None 7358 7359 kind = self._curr and self._curr.text.upper() 7360 7361 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7362 this = self._parse_table_parts() 7363 elif self._match_text_seq("TABLES"): 7364 if self._match_set((TokenType.FROM, TokenType.IN)): 7365 kind = f"{kind} {self._prev.text.upper()}" 7366 this = self._parse_table(schema=True, is_db_reference=True) 7367 elif self._match_text_seq("DATABASE"): 7368 this = self._parse_table(schema=True, is_db_reference=True) 7369 elif self._match_text_seq("CLUSTER"): 7370 this = self._parse_table() 7371 # Try matching inner expr keywords before fallback to parse table. 7372 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7373 kind = None 7374 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7375 else: 7376 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7377 kind = None 7378 this = self._parse_table_parts() 7379 7380 partition = self._try_parse(self._parse_partition) 7381 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7382 return self._parse_as_command(start) 7383 7384 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7385 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7386 "WITH", "ASYNC", "MODE" 7387 ): 7388 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7389 else: 7390 mode = None 7391 7392 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7393 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7394 7395 properties = self._parse_properties() 7396 return self.expression( 7397 exp.Analyze, 7398 kind=kind, 7399 this=this, 7400 mode=mode, 7401 partition=partition, 7402 properties=properties, 7403 expression=inner_expression, 7404 options=options, 7405 ) 7406 7407 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7408 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7409 this = None 7410 kind = self._prev.text.upper() 7411 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7412 expressions = [] 7413 7414 if not self._match_text_seq("STATISTICS"): 7415 self.raise_error("Expecting token STATISTICS") 7416 7417 if self._match_text_seq("NOSCAN"): 7418 this = "NOSCAN" 7419 elif self._match(TokenType.FOR): 7420 if self._match_text_seq("ALL", "COLUMNS"): 7421 this = "FOR ALL COLUMNS" 7422 if self._match_texts("COLUMNS"): 7423 this = "FOR COLUMNS" 7424 expressions = self._parse_csv(self._parse_column_reference) 7425 elif self._match_text_seq("SAMPLE"): 7426 sample = self._parse_number() 7427 expressions = [ 7428 self.expression( 7429 exp.AnalyzeSample, 7430 sample=sample, 7431 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7432 ) 7433 ] 7434 7435 return self.expression( 7436 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7437 ) 7438 7439 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7440 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7441 kind = None 7442 this = None 7443 expression: t.Optional[exp.Expression] = None 7444 if self._match_text_seq("REF", "UPDATE"): 7445 kind = "REF" 7446 this = "UPDATE" 7447 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7448 this = "UPDATE SET DANGLING TO NULL" 7449 elif self._match_text_seq("STRUCTURE"): 7450 kind = "STRUCTURE" 7451 if self._match_text_seq("CASCADE", "FAST"): 7452 this = "CASCADE FAST" 7453 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7454 ("ONLINE", "OFFLINE") 7455 ): 7456 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7457 expression = self._parse_into() 7458 7459 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7460 7461 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7462 this = self._prev.text.upper() 7463 if self._match_text_seq("COLUMNS"): 7464 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7465 return None 7466 7467 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7468 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7469 if self._match_text_seq("STATISTICS"): 7470 return self.expression(exp.AnalyzeDelete, kind=kind) 7471 return None 7472 7473 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7474 if self._match_text_seq("CHAINED", "ROWS"): 7475 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7476 return None 7477 7478 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7479 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7480 this = self._prev.text.upper() 7481 expression: t.Optional[exp.Expression] = None 7482 expressions = [] 7483 update_options = None 7484 7485 if self._match_text_seq("HISTOGRAM", "ON"): 7486 expressions = self._parse_csv(self._parse_column_reference) 7487 with_expressions = [] 7488 while self._match(TokenType.WITH): 7489 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7490 if self._match_texts(("SYNC", "ASYNC")): 7491 if self._match_text_seq("MODE", advance=False): 7492 with_expressions.append(f"{self._prev.text.upper()} MODE") 7493 self._advance() 7494 else: 7495 buckets = self._parse_number() 7496 if self._match_text_seq("BUCKETS"): 7497 with_expressions.append(f"{buckets} BUCKETS") 7498 if with_expressions: 7499 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7500 7501 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7502 TokenType.UPDATE, advance=False 7503 ): 7504 update_options = self._prev.text.upper() 7505 self._advance() 7506 elif self._match_text_seq("USING", "DATA"): 7507 expression = self.expression(exp.UsingData, this=self._parse_string()) 7508 7509 return self.expression( 7510 exp.AnalyzeHistogram, 7511 this=this, 7512 expressions=expressions, 7513 expression=expression, 7514 update_options=update_options, 7515 ) 7516 7517 def _parse_merge(self) -> exp.Merge: 7518 self._match(TokenType.INTO) 7519 target = self._parse_table() 7520 7521 if target and self._match(TokenType.ALIAS, advance=False): 7522 target.set("alias", self._parse_table_alias()) 7523 7524 self._match(TokenType.USING) 7525 using = self._parse_table() 7526 7527 self._match(TokenType.ON) 7528 on = self._parse_assignment() 7529 7530 return self.expression( 7531 exp.Merge, 7532 this=target, 7533 using=using, 7534 on=on, 7535 whens=self._parse_when_matched(), 7536 returning=self._parse_returning(), 7537 ) 7538 7539 def _parse_when_matched(self) -> exp.Whens: 7540 whens = [] 7541 7542 while self._match(TokenType.WHEN): 7543 matched = not self._match(TokenType.NOT) 7544 self._match_text_seq("MATCHED") 7545 source = ( 7546 False 7547 if self._match_text_seq("BY", "TARGET") 7548 else self._match_text_seq("BY", "SOURCE") 7549 ) 7550 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7551 7552 self._match(TokenType.THEN) 7553 7554 if self._match(TokenType.INSERT): 7555 this = self._parse_star() 7556 if this: 7557 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7558 else: 7559 then = self.expression( 7560 exp.Insert, 7561 this=exp.var("ROW") 7562 if self._match_text_seq("ROW") 7563 else self._parse_value(values=False), 7564 expression=self._match_text_seq("VALUES") and self._parse_value(), 7565 ) 7566 elif self._match(TokenType.UPDATE): 7567 expressions = self._parse_star() 7568 if expressions: 7569 then = self.expression(exp.Update, expressions=expressions) 7570 else: 7571 then = self.expression( 7572 exp.Update, 7573 expressions=self._match(TokenType.SET) 7574 and self._parse_csv(self._parse_equality), 7575 ) 7576 elif self._match(TokenType.DELETE): 7577 then = self.expression(exp.Var, this=self._prev.text) 7578 else: 7579 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7580 7581 whens.append( 7582 self.expression( 7583 exp.When, 7584 matched=matched, 7585 source=source, 7586 condition=condition, 7587 then=then, 7588 ) 7589 ) 7590 return self.expression(exp.Whens, expressions=whens) 7591 7592 def _parse_show(self) -> t.Optional[exp.Expression]: 7593 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7594 if parser: 7595 return parser(self) 7596 return self._parse_as_command(self._prev) 7597 7598 def _parse_set_item_assignment( 7599 self, kind: t.Optional[str] = None 7600 ) -> t.Optional[exp.Expression]: 7601 index = self._index 7602 7603 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7604 return self._parse_set_transaction(global_=kind == "GLOBAL") 7605 7606 left = self._parse_primary() or self._parse_column() 7607 assignment_delimiter = self._match_texts(("=", "TO")) 7608 7609 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7610 self._retreat(index) 7611 return None 7612 7613 right = self._parse_statement() or self._parse_id_var() 7614 if isinstance(right, (exp.Column, exp.Identifier)): 7615 right = exp.var(right.name) 7616 7617 this = self.expression(exp.EQ, this=left, expression=right) 7618 return self.expression(exp.SetItem, this=this, kind=kind) 7619 7620 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7621 self._match_text_seq("TRANSACTION") 7622 characteristics = self._parse_csv( 7623 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7624 ) 7625 return self.expression( 7626 exp.SetItem, 7627 expressions=characteristics, 7628 kind="TRANSACTION", 7629 **{"global": global_}, # type: ignore 7630 ) 7631 7632 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7633 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7634 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7635 7636 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7637 index = self._index 7638 set_ = self.expression( 7639 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7640 ) 7641 7642 if self._curr: 7643 self._retreat(index) 7644 return self._parse_as_command(self._prev) 7645 7646 return set_ 7647 7648 def _parse_var_from_options( 7649 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7650 ) -> t.Optional[exp.Var]: 7651 start = self._curr 7652 if not start: 7653 return None 7654 7655 option = start.text.upper() 7656 continuations = options.get(option) 7657 7658 index = self._index 7659 self._advance() 7660 for keywords in continuations or []: 7661 if isinstance(keywords, str): 7662 keywords = (keywords,) 7663 7664 if self._match_text_seq(*keywords): 7665 option = f"{option} {' '.join(keywords)}" 7666 break 7667 else: 7668 if continuations or continuations is None: 7669 if raise_unmatched: 7670 self.raise_error(f"Unknown option {option}") 7671 7672 self._retreat(index) 7673 return None 7674 7675 return exp.var(option) 7676 7677 def _parse_as_command(self, start: Token) -> exp.Command: 7678 while self._curr: 7679 self._advance() 7680 text = self._find_sql(start, self._prev) 7681 size = len(start.text) 7682 self._warn_unsupported() 7683 return exp.Command(this=text[:size], expression=text[size:]) 7684 7685 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7686 settings = [] 7687 7688 self._match_l_paren() 7689 kind = self._parse_id_var() 7690 7691 if self._match(TokenType.L_PAREN): 7692 while True: 7693 key = self._parse_id_var() 7694 value = self._parse_primary() 7695 if not key and value is None: 7696 break 7697 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7698 self._match(TokenType.R_PAREN) 7699 7700 self._match_r_paren() 7701 7702 return self.expression( 7703 exp.DictProperty, 7704 this=this, 7705 kind=kind.this if kind else None, 7706 settings=settings, 7707 ) 7708 7709 def _parse_dict_range(self, this: str) -> exp.DictRange: 7710 self._match_l_paren() 7711 has_min = self._match_text_seq("MIN") 7712 if has_min: 7713 min = self._parse_var() or self._parse_primary() 7714 self._match_text_seq("MAX") 7715 max = self._parse_var() or self._parse_primary() 7716 else: 7717 max = self._parse_var() or self._parse_primary() 7718 min = exp.Literal.number(0) 7719 self._match_r_paren() 7720 return self.expression(exp.DictRange, this=this, min=min, max=max) 7721 7722 def _parse_comprehension( 7723 self, this: t.Optional[exp.Expression] 7724 ) -> t.Optional[exp.Comprehension]: 7725 index = self._index 7726 expression = self._parse_column() 7727 if not self._match(TokenType.IN): 7728 self._retreat(index - 1) 7729 return None 7730 iterator = self._parse_column() 7731 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7732 return self.expression( 7733 exp.Comprehension, 7734 this=this, 7735 expression=expression, 7736 iterator=iterator, 7737 condition=condition, 7738 ) 7739 7740 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7741 if self._match(TokenType.HEREDOC_STRING): 7742 return self.expression(exp.Heredoc, this=self._prev.text) 7743 7744 if not self._match_text_seq("$"): 7745 return None 7746 7747 tags = ["$"] 7748 tag_text = None 7749 7750 if self._is_connected(): 7751 self._advance() 7752 tags.append(self._prev.text.upper()) 7753 else: 7754 self.raise_error("No closing $ found") 7755 7756 if tags[-1] != "$": 7757 if self._is_connected() and self._match_text_seq("$"): 7758 tag_text = tags[-1] 7759 tags.append("$") 7760 else: 7761 self.raise_error("No closing $ found") 7762 7763 heredoc_start = self._curr 7764 7765 while self._curr: 7766 if self._match_text_seq(*tags, advance=False): 7767 this = self._find_sql(heredoc_start, self._prev) 7768 self._advance(len(tags)) 7769 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7770 7771 self._advance() 7772 7773 self.raise_error(f"No closing {''.join(tags)} found") 7774 return None 7775 7776 def _find_parser( 7777 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7778 ) -> t.Optional[t.Callable]: 7779 if not self._curr: 7780 return None 7781 7782 index = self._index 7783 this = [] 7784 while True: 7785 # The current token might be multiple words 7786 curr = self._curr.text.upper() 7787 key = curr.split(" ") 7788 this.append(curr) 7789 7790 self._advance() 7791 result, trie = in_trie(trie, key) 7792 if result == TrieResult.FAILED: 7793 break 7794 7795 if result == TrieResult.EXISTS: 7796 subparser = parsers[" ".join(this)] 7797 return subparser 7798 7799 self._retreat(index) 7800 return None 7801 7802 def _match(self, token_type, advance=True, expression=None): 7803 if not self._curr: 7804 return None 7805 7806 if self._curr.token_type == token_type: 7807 if advance: 7808 self._advance() 7809 self._add_comments(expression) 7810 return True 7811 7812 return None 7813 7814 def _match_set(self, types, advance=True): 7815 if not self._curr: 7816 return None 7817 7818 if self._curr.token_type in types: 7819 if advance: 7820 self._advance() 7821 return True 7822 7823 return None 7824 7825 def _match_pair(self, token_type_a, token_type_b, advance=True): 7826 if not self._curr or not self._next: 7827 return None 7828 7829 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7830 if advance: 7831 self._advance(2) 7832 return True 7833 7834 return None 7835 7836 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7837 if not self._match(TokenType.L_PAREN, expression=expression): 7838 self.raise_error("Expecting (") 7839 7840 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7841 if not self._match(TokenType.R_PAREN, expression=expression): 7842 self.raise_error("Expecting )") 7843 7844 def _match_texts(self, texts, advance=True): 7845 if ( 7846 self._curr 7847 and self._curr.token_type != TokenType.STRING 7848 and self._curr.text.upper() in texts 7849 ): 7850 if advance: 7851 self._advance() 7852 return True 7853 return None 7854 7855 def _match_text_seq(self, *texts, advance=True): 7856 index = self._index 7857 for text in texts: 7858 if ( 7859 self._curr 7860 and self._curr.token_type != TokenType.STRING 7861 and self._curr.text.upper() == text 7862 ): 7863 self._advance() 7864 else: 7865 self._retreat(index) 7866 return None 7867 7868 if not advance: 7869 self._retreat(index) 7870 7871 return True 7872 7873 def _replace_lambda( 7874 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7875 ) -> t.Optional[exp.Expression]: 7876 if not node: 7877 return node 7878 7879 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7880 7881 for column in node.find_all(exp.Column): 7882 typ = lambda_types.get(column.parts[0].name) 7883 if typ is not None: 7884 dot_or_id = column.to_dot() if column.table else column.this 7885 7886 if typ: 7887 dot_or_id = self.expression( 7888 exp.Cast, 7889 this=dot_or_id, 7890 to=typ, 7891 ) 7892 7893 parent = column.parent 7894 7895 while isinstance(parent, exp.Dot): 7896 if not isinstance(parent.parent, exp.Dot): 7897 parent.replace(dot_or_id) 7898 break 7899 parent = parent.parent 7900 else: 7901 if column is node: 7902 node = dot_or_id 7903 else: 7904 column.replace(dot_or_id) 7905 return node 7906 7907 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7908 start = self._prev 7909 7910 # Not to be confused with TRUNCATE(number, decimals) function call 7911 if self._match(TokenType.L_PAREN): 7912 self._retreat(self._index - 2) 7913 return self._parse_function() 7914 7915 # Clickhouse supports TRUNCATE DATABASE as well 7916 is_database = self._match(TokenType.DATABASE) 7917 7918 self._match(TokenType.TABLE) 7919 7920 exists = self._parse_exists(not_=False) 7921 7922 expressions = self._parse_csv( 7923 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7924 ) 7925 7926 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7927 7928 if self._match_text_seq("RESTART", "IDENTITY"): 7929 identity = "RESTART" 7930 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7931 identity = "CONTINUE" 7932 else: 7933 identity = None 7934 7935 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7936 option = self._prev.text 7937 else: 7938 option = None 7939 7940 partition = self._parse_partition() 7941 7942 # Fallback case 7943 if self._curr: 7944 return self._parse_as_command(start) 7945 7946 return self.expression( 7947 exp.TruncateTable, 7948 expressions=expressions, 7949 is_database=is_database, 7950 exists=exists, 7951 cluster=cluster, 7952 identity=identity, 7953 option=option, 7954 partition=partition, 7955 ) 7956 7957 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7958 this = self._parse_ordered(self._parse_opclass) 7959 7960 if not self._match(TokenType.WITH): 7961 return this 7962 7963 op = self._parse_var(any_token=True) 7964 7965 return self.expression(exp.WithOperator, this=this, op=op) 7966 7967 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7968 self._match(TokenType.EQ) 7969 self._match(TokenType.L_PAREN) 7970 7971 opts: t.List[t.Optional[exp.Expression]] = [] 7972 option: exp.Expression | None 7973 while self._curr and not self._match(TokenType.R_PAREN): 7974 if self._match_text_seq("FORMAT_NAME", "="): 7975 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 7976 option = self._parse_format_name() 7977 else: 7978 option = self._parse_property() 7979 7980 if option is None: 7981 self.raise_error("Unable to parse option") 7982 break 7983 7984 opts.append(option) 7985 7986 return opts 7987 7988 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7989 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7990 7991 options = [] 7992 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7993 option = self._parse_var(any_token=True) 7994 prev = self._prev.text.upper() 7995 7996 # Different dialects might separate options and values by white space, "=" and "AS" 7997 self._match(TokenType.EQ) 7998 self._match(TokenType.ALIAS) 7999 8000 param = self.expression(exp.CopyParameter, this=option) 8001 8002 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8003 TokenType.L_PAREN, advance=False 8004 ): 8005 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8006 param.set("expressions", self._parse_wrapped_options()) 8007 elif prev == "FILE_FORMAT": 8008 # T-SQL's external file format case 8009 param.set("expression", self._parse_field()) 8010 else: 8011 param.set("expression", self._parse_unquoted_field()) 8012 8013 options.append(param) 8014 self._match(sep) 8015 8016 return options 8017 8018 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8019 expr = self.expression(exp.Credentials) 8020 8021 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8022 expr.set("storage", self._parse_field()) 8023 if self._match_text_seq("CREDENTIALS"): 8024 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8025 creds = ( 8026 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8027 ) 8028 expr.set("credentials", creds) 8029 if self._match_text_seq("ENCRYPTION"): 8030 expr.set("encryption", self._parse_wrapped_options()) 8031 if self._match_text_seq("IAM_ROLE"): 8032 expr.set("iam_role", self._parse_field()) 8033 if self._match_text_seq("REGION"): 8034 expr.set("region", self._parse_field()) 8035 8036 return expr 8037 8038 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8039 return self._parse_field() 8040 8041 def _parse_copy(self) -> exp.Copy | exp.Command: 8042 start = self._prev 8043 8044 self._match(TokenType.INTO) 8045 8046 this = ( 8047 self._parse_select(nested=True, parse_subquery_alias=False) 8048 if self._match(TokenType.L_PAREN, advance=False) 8049 else self._parse_table(schema=True) 8050 ) 8051 8052 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8053 8054 files = self._parse_csv(self._parse_file_location) 8055 credentials = self._parse_credentials() 8056 8057 self._match_text_seq("WITH") 8058 8059 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8060 8061 # Fallback case 8062 if self._curr: 8063 return self._parse_as_command(start) 8064 8065 return self.expression( 8066 exp.Copy, 8067 this=this, 8068 kind=kind, 8069 credentials=credentials, 8070 files=files, 8071 params=params, 8072 ) 8073 8074 def _parse_normalize(self) -> exp.Normalize: 8075 return self.expression( 8076 exp.Normalize, 8077 this=self._parse_bitwise(), 8078 form=self._match(TokenType.COMMA) and self._parse_var(), 8079 ) 8080 8081 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8082 args = self._parse_csv(lambda: self._parse_lambda()) 8083 8084 this = seq_get(args, 0) 8085 decimals = seq_get(args, 1) 8086 8087 return expr_type( 8088 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8089 ) 8090 8091 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8092 if self._match_text_seq("COLUMNS", "(", advance=False): 8093 this = self._parse_function() 8094 if isinstance(this, exp.Columns): 8095 this.set("unpack", True) 8096 return this 8097 8098 return self.expression( 8099 exp.Star, 8100 **{ # type: ignore 8101 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8102 "replace": self._parse_star_op("REPLACE"), 8103 "rename": self._parse_star_op("RENAME"), 8104 }, 8105 ) 8106 8107 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8108 privilege_parts = [] 8109 8110 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8111 # (end of privilege list) or L_PAREN (start of column list) are met 8112 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8113 privilege_parts.append(self._curr.text.upper()) 8114 self._advance() 8115 8116 this = exp.var(" ".join(privilege_parts)) 8117 expressions = ( 8118 self._parse_wrapped_csv(self._parse_column) 8119 if self._match(TokenType.L_PAREN, advance=False) 8120 else None 8121 ) 8122 8123 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8124 8125 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8126 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8127 principal = self._parse_id_var() 8128 8129 if not principal: 8130 return None 8131 8132 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8133 8134 def _parse_grant(self) -> exp.Grant | exp.Command: 8135 start = self._prev 8136 8137 privileges = self._parse_csv(self._parse_grant_privilege) 8138 8139 self._match(TokenType.ON) 8140 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8141 8142 # Attempt to parse the securable e.g. MySQL allows names 8143 # such as "foo.*", "*.*" which are not easily parseable yet 8144 securable = self._try_parse(self._parse_table_parts) 8145 8146 if not securable or not self._match_text_seq("TO"): 8147 return self._parse_as_command(start) 8148 8149 principals = self._parse_csv(self._parse_grant_principal) 8150 8151 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8152 8153 if self._curr: 8154 return self._parse_as_command(start) 8155 8156 return self.expression( 8157 exp.Grant, 8158 privileges=privileges, 8159 kind=kind, 8160 securable=securable, 8161 principals=principals, 8162 grant_option=grant_option, 8163 ) 8164 8165 def _parse_overlay(self) -> exp.Overlay: 8166 return self.expression( 8167 exp.Overlay, 8168 **{ # type: ignore 8169 "this": self._parse_bitwise(), 8170 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8171 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8172 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8173 }, 8174 ) 8175 8176 def _parse_format_name(self) -> exp.Property: 8177 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8178 # for FILE_FORMAT = <format_name> 8179 return self.expression( 8180 exp.Property, 8181 this=exp.var("FORMAT_NAME"), 8182 value=self._parse_string() or self._parse_table_parts(), 8183 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1472 def __init__( 1473 self, 1474 error_level: t.Optional[ErrorLevel] = None, 1475 error_message_context: int = 100, 1476 max_errors: int = 3, 1477 dialect: DialectType = None, 1478 ): 1479 from sqlglot.dialects import Dialect 1480 1481 self.error_level = error_level or ErrorLevel.IMMEDIATE 1482 self.error_message_context = error_message_context 1483 self.max_errors = max_errors 1484 self.dialect = Dialect.get_or_raise(dialect) 1485 self.reset()
1497 def parse( 1498 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1499 ) -> t.List[t.Optional[exp.Expression]]: 1500 """ 1501 Parses a list of tokens and returns a list of syntax trees, one tree 1502 per parsed SQL statement. 1503 1504 Args: 1505 raw_tokens: The list of tokens. 1506 sql: The original SQL string, used to produce helpful debug messages. 1507 1508 Returns: 1509 The list of the produced syntax trees. 1510 """ 1511 return self._parse( 1512 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1513 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1515 def parse_into( 1516 self, 1517 expression_types: exp.IntoType, 1518 raw_tokens: t.List[Token], 1519 sql: t.Optional[str] = None, 1520 ) -> t.List[t.Optional[exp.Expression]]: 1521 """ 1522 Parses a list of tokens into a given Expression type. If a collection of Expression 1523 types is given instead, this method will try to parse the token list into each one 1524 of them, stopping at the first for which the parsing succeeds. 1525 1526 Args: 1527 expression_types: The expression type(s) to try and parse the token list into. 1528 raw_tokens: The list of tokens. 1529 sql: The original SQL string, used to produce helpful debug messages. 1530 1531 Returns: 1532 The target Expression. 1533 """ 1534 errors = [] 1535 for expression_type in ensure_list(expression_types): 1536 parser = self.EXPRESSION_PARSERS.get(expression_type) 1537 if not parser: 1538 raise TypeError(f"No parser registered for {expression_type}") 1539 1540 try: 1541 return self._parse(parser, raw_tokens, sql) 1542 except ParseError as e: 1543 e.errors[0]["into_expression"] = expression_type 1544 errors.append(e) 1545 1546 raise ParseError( 1547 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1548 errors=merge_errors(errors), 1549 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1589 def check_errors(self) -> None: 1590 """Logs or raises any found errors, depending on the chosen error level setting.""" 1591 if self.error_level == ErrorLevel.WARN: 1592 for error in self.errors: 1593 logger.error(str(error)) 1594 elif self.error_level == ErrorLevel.RAISE and self.errors: 1595 raise ParseError( 1596 concat_messages(self.errors, self.max_errors), 1597 errors=merge_errors(self.errors), 1598 )
Logs or raises any found errors, depending on the chosen error level setting.
1600 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1601 """ 1602 Appends an error in the list of recorded errors or raises it, depending on the chosen 1603 error level setting. 1604 """ 1605 token = token or self._curr or self._prev or Token.string("") 1606 start = token.start 1607 end = token.end + 1 1608 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1609 highlight = self.sql[start:end] 1610 end_context = self.sql[end : end + self.error_message_context] 1611 1612 error = ParseError.new( 1613 f"{message}. Line {token.line}, Col: {token.col}.\n" 1614 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1615 description=message, 1616 line=token.line, 1617 col=token.col, 1618 start_context=start_context, 1619 highlight=highlight, 1620 end_context=end_context, 1621 ) 1622 1623 if self.error_level == ErrorLevel.IMMEDIATE: 1624 raise error 1625 1626 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1628 def expression( 1629 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1630 ) -> E: 1631 """ 1632 Creates a new, validated Expression. 1633 1634 Args: 1635 exp_class: The expression class to instantiate. 1636 comments: An optional list of comments to attach to the expression. 1637 kwargs: The arguments to set for the expression along with their respective values. 1638 1639 Returns: 1640 The target expression. 1641 """ 1642 instance = exp_class(**kwargs) 1643 instance.add_comments(comments) if comments else self._add_comments(instance) 1644 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1651 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1652 """ 1653 Validates an Expression, making sure that all its mandatory arguments are set. 1654 1655 Args: 1656 expression: The expression to validate. 1657 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1658 1659 Returns: 1660 The validated expression. 1661 """ 1662 if self.error_level != ErrorLevel.IGNORE: 1663 for error_message in expression.error_messages(args): 1664 self.raise_error(error_message) 1665 1666 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4628 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4629 start = self._index 4630 _, side_token, kind_token = self._parse_join_parts() 4631 4632 side = side_token.text if side_token else None 4633 kind = kind_token.text if kind_token else None 4634 4635 if not self._match_set(self.SET_OPERATIONS): 4636 self._retreat(start) 4637 return None 4638 4639 token_type = self._prev.token_type 4640 4641 if token_type == TokenType.UNION: 4642 operation: t.Type[exp.SetOperation] = exp.Union 4643 elif token_type == TokenType.EXCEPT: 4644 operation = exp.Except 4645 else: 4646 operation = exp.Intersect 4647 4648 comments = self._prev.comments 4649 4650 if self._match(TokenType.DISTINCT): 4651 distinct: t.Optional[bool] = True 4652 elif self._match(TokenType.ALL): 4653 distinct = False 4654 else: 4655 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4656 if distinct is None: 4657 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4658 4659 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4660 "STRICT", "CORRESPONDING" 4661 ) 4662 if self._match_text_seq("CORRESPONDING"): 4663 by_name = True 4664 if not side and not kind: 4665 kind = "INNER" 4666 4667 on_column_list = None 4668 if by_name and self._match_texts(("ON", "BY")): 4669 on_column_list = self._parse_wrapped_csv(self._parse_column) 4670 4671 expression = self._parse_select(nested=True, parse_set_operation=False) 4672 4673 return self.expression( 4674 operation, 4675 comments=comments, 4676 this=this, 4677 distinct=distinct, 4678 by_name=by_name, 4679 expression=expression, 4680 side=side, 4681 kind=kind, 4682 on=on_column_list, 4683 )