sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 20 21logger = logging.getLogger("sqlglot") 22 23OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 24 25 26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 37 38 39def build_like(args: t.List) -> exp.Escape | exp.Like: 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range 56 57 58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 69 70 71def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 72 arg = seq_get(args, 0) 73 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 74 75 76def build_lower(args: t.List) -> exp.Lower | exp.Hex: 77 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 78 arg = seq_get(args, 0) 79 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 80 81 82def build_upper(args: t.List) -> exp.Upper | exp.Hex: 83 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 84 arg = seq_get(args, 0) 85 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 86 87 88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder 99 100 101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression) 110 111 112def build_pad(args: t.List, is_left: bool = True): 113 return exp.Pad( 114 this=seq_get(args, 0), 115 expression=seq_get(args, 1), 116 fill_pattern=seq_get(args, 2), 117 is_left=is_left, 118 ) 119 120 121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp 130 131 132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args) 142 143 144def build_trim(args: t.List, is_left: bool = True): 145 return exp.Trim( 146 this=seq_get(args, 0), 147 expression=seq_get(args, 1), 148 position="LEADING" if is_left else "TRAILING", 149 ) 150 151 152def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 153 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 154 155 156def build_locate_strposition(args: t.List): 157 return exp.StrPosition( 158 this=seq_get(args, 1), 159 substr=seq_get(args, 0), 160 position=seq_get(args, 2), 161 ) 162 163 164class _Parser(type): 165 def __new__(cls, clsname, bases, attrs): 166 klass = super().__new__(cls, clsname, bases, attrs) 167 168 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 169 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 170 171 return klass 172 173 174class Parser(metaclass=_Parser): 175 """ 176 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 177 178 Args: 179 error_level: The desired error level. 180 Default: ErrorLevel.IMMEDIATE 181 error_message_context: The amount of context to capture from a query string when displaying 182 the error message (in number of characters). 183 Default: 100 184 max_errors: Maximum number of error messages to include in a raised ParseError. 185 This is only relevant if error_level is ErrorLevel.RAISE. 186 Default: 3 187 """ 188 189 FUNCTIONS: t.Dict[str, t.Callable] = { 190 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 191 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 192 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 193 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 194 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 195 ), 196 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "CHAR": lambda args: exp.Chr(expressions=args), 200 "CHR": lambda args: exp.Chr(expressions=args), 201 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 202 "CONCAT": lambda args, dialect: exp.Concat( 203 expressions=args, 204 safe=not dialect.STRICT_STRING_CONCAT, 205 coalesce=dialect.CONCAT_COALESCE, 206 ), 207 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 208 expressions=args, 209 safe=not dialect.STRICT_STRING_CONCAT, 210 coalesce=dialect.CONCAT_COALESCE, 211 ), 212 "CONVERT_TIMEZONE": build_convert_timezone, 213 "DATE_TO_DATE_STR": lambda args: exp.Cast( 214 this=seq_get(args, 0), 215 to=exp.DataType(this=exp.DataType.Type.TEXT), 216 ), 217 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 218 start=seq_get(args, 0), 219 end=seq_get(args, 1), 220 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 221 ), 222 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 223 "HEX": build_hex, 224 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 225 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 226 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 227 "LIKE": build_like, 228 "LOG": build_logarithm, 229 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 230 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 231 "LOWER": build_lower, 232 "LPAD": lambda args: build_pad(args), 233 "LEFTPAD": lambda args: build_pad(args), 234 "LTRIM": lambda args: build_trim(args), 235 "MOD": build_mod, 236 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 237 "RPAD": lambda args: build_pad(args, is_left=False), 238 "RTRIM": lambda args: build_trim(args, is_left=False), 239 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 240 if len(args) != 2 241 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 242 "STRPOS": exp.StrPosition.from_arg_list, 243 "CHARINDEX": lambda args: build_locate_strposition(args), 244 "INSTR": exp.StrPosition.from_arg_list, 245 "LOCATE": lambda args: build_locate_strposition(args), 246 "TIME_TO_TIME_STR": lambda args: exp.Cast( 247 this=seq_get(args, 0), 248 to=exp.DataType(this=exp.DataType.Type.TEXT), 249 ), 250 "TO_HEX": build_hex, 251 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 252 this=exp.Cast( 253 this=seq_get(args, 0), 254 to=exp.DataType(this=exp.DataType.Type.TEXT), 255 ), 256 start=exp.Literal.number(1), 257 length=exp.Literal.number(10), 258 ), 259 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 260 "UPPER": build_upper, 261 "VAR_MAP": build_var_map, 262 } 263 264 NO_PAREN_FUNCTIONS = { 265 TokenType.CURRENT_DATE: exp.CurrentDate, 266 TokenType.CURRENT_DATETIME: exp.CurrentDate, 267 TokenType.CURRENT_TIME: exp.CurrentTime, 268 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 269 TokenType.CURRENT_USER: exp.CurrentUser, 270 } 271 272 STRUCT_TYPE_TOKENS = { 273 TokenType.NESTED, 274 TokenType.OBJECT, 275 TokenType.STRUCT, 276 TokenType.UNION, 277 } 278 279 NESTED_TYPE_TOKENS = { 280 TokenType.ARRAY, 281 TokenType.LIST, 282 TokenType.LOWCARDINALITY, 283 TokenType.MAP, 284 TokenType.NULLABLE, 285 TokenType.RANGE, 286 *STRUCT_TYPE_TOKENS, 287 } 288 289 ENUM_TYPE_TOKENS = { 290 TokenType.DYNAMIC, 291 TokenType.ENUM, 292 TokenType.ENUM8, 293 TokenType.ENUM16, 294 } 295 296 AGGREGATE_TYPE_TOKENS = { 297 TokenType.AGGREGATEFUNCTION, 298 TokenType.SIMPLEAGGREGATEFUNCTION, 299 } 300 301 TYPE_TOKENS = { 302 TokenType.BIT, 303 TokenType.BOOLEAN, 304 TokenType.TINYINT, 305 TokenType.UTINYINT, 306 TokenType.SMALLINT, 307 TokenType.USMALLINT, 308 TokenType.INT, 309 TokenType.UINT, 310 TokenType.BIGINT, 311 TokenType.UBIGINT, 312 TokenType.INT128, 313 TokenType.UINT128, 314 TokenType.INT256, 315 TokenType.UINT256, 316 TokenType.MEDIUMINT, 317 TokenType.UMEDIUMINT, 318 TokenType.FIXEDSTRING, 319 TokenType.FLOAT, 320 TokenType.DOUBLE, 321 TokenType.CHAR, 322 TokenType.NCHAR, 323 TokenType.VARCHAR, 324 TokenType.NVARCHAR, 325 TokenType.BPCHAR, 326 TokenType.TEXT, 327 TokenType.MEDIUMTEXT, 328 TokenType.LONGTEXT, 329 TokenType.MEDIUMBLOB, 330 TokenType.LONGBLOB, 331 TokenType.BINARY, 332 TokenType.VARBINARY, 333 TokenType.JSON, 334 TokenType.JSONB, 335 TokenType.INTERVAL, 336 TokenType.TINYBLOB, 337 TokenType.TINYTEXT, 338 TokenType.TIME, 339 TokenType.TIMETZ, 340 TokenType.TIMESTAMP, 341 TokenType.TIMESTAMP_S, 342 TokenType.TIMESTAMP_MS, 343 TokenType.TIMESTAMP_NS, 344 TokenType.TIMESTAMPTZ, 345 TokenType.TIMESTAMPLTZ, 346 TokenType.TIMESTAMPNTZ, 347 TokenType.DATETIME, 348 TokenType.DATETIME2, 349 TokenType.DATETIME64, 350 TokenType.SMALLDATETIME, 351 TokenType.DATE, 352 TokenType.DATE32, 353 TokenType.INT4RANGE, 354 TokenType.INT4MULTIRANGE, 355 TokenType.INT8RANGE, 356 TokenType.INT8MULTIRANGE, 357 TokenType.NUMRANGE, 358 TokenType.NUMMULTIRANGE, 359 TokenType.TSRANGE, 360 TokenType.TSMULTIRANGE, 361 TokenType.TSTZRANGE, 362 TokenType.TSTZMULTIRANGE, 363 TokenType.DATERANGE, 364 TokenType.DATEMULTIRANGE, 365 TokenType.DECIMAL, 366 TokenType.DECIMAL32, 367 TokenType.DECIMAL64, 368 TokenType.DECIMAL128, 369 TokenType.DECIMAL256, 370 TokenType.UDECIMAL, 371 TokenType.BIGDECIMAL, 372 TokenType.UUID, 373 TokenType.GEOGRAPHY, 374 TokenType.GEOMETRY, 375 TokenType.POINT, 376 TokenType.RING, 377 TokenType.LINESTRING, 378 TokenType.MULTILINESTRING, 379 TokenType.POLYGON, 380 TokenType.MULTIPOLYGON, 381 TokenType.HLLSKETCH, 382 TokenType.HSTORE, 383 TokenType.PSEUDO_TYPE, 384 TokenType.SUPER, 385 TokenType.SERIAL, 386 TokenType.SMALLSERIAL, 387 TokenType.BIGSERIAL, 388 TokenType.XML, 389 TokenType.YEAR, 390 TokenType.USERDEFINED, 391 TokenType.MONEY, 392 TokenType.SMALLMONEY, 393 TokenType.ROWVERSION, 394 TokenType.IMAGE, 395 TokenType.VARIANT, 396 TokenType.VECTOR, 397 TokenType.OBJECT, 398 TokenType.OBJECT_IDENTIFIER, 399 TokenType.INET, 400 TokenType.IPADDRESS, 401 TokenType.IPPREFIX, 402 TokenType.IPV4, 403 TokenType.IPV6, 404 TokenType.UNKNOWN, 405 TokenType.NULL, 406 TokenType.NAME, 407 TokenType.TDIGEST, 408 TokenType.DYNAMIC, 409 *ENUM_TYPE_TOKENS, 410 *NESTED_TYPE_TOKENS, 411 *AGGREGATE_TYPE_TOKENS, 412 } 413 414 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 415 TokenType.BIGINT: TokenType.UBIGINT, 416 TokenType.INT: TokenType.UINT, 417 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 418 TokenType.SMALLINT: TokenType.USMALLINT, 419 TokenType.TINYINT: TokenType.UTINYINT, 420 TokenType.DECIMAL: TokenType.UDECIMAL, 421 } 422 423 SUBQUERY_PREDICATES = { 424 TokenType.ANY: exp.Any, 425 TokenType.ALL: exp.All, 426 TokenType.EXISTS: exp.Exists, 427 TokenType.SOME: exp.Any, 428 } 429 430 RESERVED_TOKENS = { 431 *Tokenizer.SINGLE_TOKENS.values(), 432 TokenType.SELECT, 433 } - {TokenType.IDENTIFIER} 434 435 DB_CREATABLES = { 436 TokenType.DATABASE, 437 TokenType.DICTIONARY, 438 TokenType.MODEL, 439 TokenType.NAMESPACE, 440 TokenType.SCHEMA, 441 TokenType.SEQUENCE, 442 TokenType.SINK, 443 TokenType.SOURCE, 444 TokenType.STORAGE_INTEGRATION, 445 TokenType.STREAMLIT, 446 TokenType.TABLE, 447 TokenType.TAG, 448 TokenType.VIEW, 449 TokenType.WAREHOUSE, 450 } 451 452 CREATABLES = { 453 TokenType.COLUMN, 454 TokenType.CONSTRAINT, 455 TokenType.FOREIGN_KEY, 456 TokenType.FUNCTION, 457 TokenType.INDEX, 458 TokenType.PROCEDURE, 459 *DB_CREATABLES, 460 } 461 462 ALTERABLES = { 463 TokenType.INDEX, 464 TokenType.TABLE, 465 TokenType.VIEW, 466 } 467 468 # Tokens that can represent identifiers 469 ID_VAR_TOKENS = { 470 TokenType.ALL, 471 TokenType.ATTACH, 472 TokenType.VAR, 473 TokenType.ANTI, 474 TokenType.APPLY, 475 TokenType.ASC, 476 TokenType.ASOF, 477 TokenType.AUTO_INCREMENT, 478 TokenType.BEGIN, 479 TokenType.BPCHAR, 480 TokenType.CACHE, 481 TokenType.CASE, 482 TokenType.COLLATE, 483 TokenType.COMMAND, 484 TokenType.COMMENT, 485 TokenType.COMMIT, 486 TokenType.CONSTRAINT, 487 TokenType.COPY, 488 TokenType.CUBE, 489 TokenType.CURRENT_SCHEMA, 490 TokenType.DEFAULT, 491 TokenType.DELETE, 492 TokenType.DESC, 493 TokenType.DESCRIBE, 494 TokenType.DETACH, 495 TokenType.DICTIONARY, 496 TokenType.DIV, 497 TokenType.END, 498 TokenType.EXECUTE, 499 TokenType.EXPORT, 500 TokenType.ESCAPE, 501 TokenType.FALSE, 502 TokenType.FIRST, 503 TokenType.FILTER, 504 TokenType.FINAL, 505 TokenType.FORMAT, 506 TokenType.FULL, 507 TokenType.IDENTIFIER, 508 TokenType.IS, 509 TokenType.ISNULL, 510 TokenType.INTERVAL, 511 TokenType.KEEP, 512 TokenType.KILL, 513 TokenType.LEFT, 514 TokenType.LIMIT, 515 TokenType.LOAD, 516 TokenType.MERGE, 517 TokenType.NATURAL, 518 TokenType.NEXT, 519 TokenType.OFFSET, 520 TokenType.OPERATOR, 521 TokenType.ORDINALITY, 522 TokenType.OVERLAPS, 523 TokenType.OVERWRITE, 524 TokenType.PARTITION, 525 TokenType.PERCENT, 526 TokenType.PIVOT, 527 TokenType.PRAGMA, 528 TokenType.RANGE, 529 TokenType.RECURSIVE, 530 TokenType.REFERENCES, 531 TokenType.REFRESH, 532 TokenType.RENAME, 533 TokenType.REPLACE, 534 TokenType.RIGHT, 535 TokenType.ROLLUP, 536 TokenType.ROW, 537 TokenType.ROWS, 538 TokenType.SEMI, 539 TokenType.SET, 540 TokenType.SETTINGS, 541 TokenType.SHOW, 542 TokenType.TEMPORARY, 543 TokenType.TOP, 544 TokenType.TRUE, 545 TokenType.TRUNCATE, 546 TokenType.UNIQUE, 547 TokenType.UNNEST, 548 TokenType.UNPIVOT, 549 TokenType.UPDATE, 550 TokenType.USE, 551 TokenType.VOLATILE, 552 TokenType.WINDOW, 553 *CREATABLES, 554 *SUBQUERY_PREDICATES, 555 *TYPE_TOKENS, 556 *NO_PAREN_FUNCTIONS, 557 } 558 ID_VAR_TOKENS.remove(TokenType.UNION) 559 560 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 561 TokenType.ANTI, 562 TokenType.APPLY, 563 TokenType.ASOF, 564 TokenType.FULL, 565 TokenType.LEFT, 566 TokenType.LOCK, 567 TokenType.NATURAL, 568 TokenType.RIGHT, 569 TokenType.SEMI, 570 TokenType.WINDOW, 571 } 572 573 ALIAS_TOKENS = ID_VAR_TOKENS 574 575 ARRAY_CONSTRUCTORS = { 576 "ARRAY": exp.Array, 577 "LIST": exp.List, 578 } 579 580 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 581 582 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 583 584 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 585 586 FUNC_TOKENS = { 587 TokenType.COLLATE, 588 TokenType.COMMAND, 589 TokenType.CURRENT_DATE, 590 TokenType.CURRENT_DATETIME, 591 TokenType.CURRENT_SCHEMA, 592 TokenType.CURRENT_TIMESTAMP, 593 TokenType.CURRENT_TIME, 594 TokenType.CURRENT_USER, 595 TokenType.FILTER, 596 TokenType.FIRST, 597 TokenType.FORMAT, 598 TokenType.GLOB, 599 TokenType.IDENTIFIER, 600 TokenType.INDEX, 601 TokenType.ISNULL, 602 TokenType.ILIKE, 603 TokenType.INSERT, 604 TokenType.LIKE, 605 TokenType.MERGE, 606 TokenType.NEXT, 607 TokenType.OFFSET, 608 TokenType.PRIMARY_KEY, 609 TokenType.RANGE, 610 TokenType.REPLACE, 611 TokenType.RLIKE, 612 TokenType.ROW, 613 TokenType.UNNEST, 614 TokenType.VAR, 615 TokenType.LEFT, 616 TokenType.RIGHT, 617 TokenType.SEQUENCE, 618 TokenType.DATE, 619 TokenType.DATETIME, 620 TokenType.TABLE, 621 TokenType.TIMESTAMP, 622 TokenType.TIMESTAMPTZ, 623 TokenType.TRUNCATE, 624 TokenType.WINDOW, 625 TokenType.XOR, 626 *TYPE_TOKENS, 627 *SUBQUERY_PREDICATES, 628 } 629 630 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 631 TokenType.AND: exp.And, 632 } 633 634 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 635 TokenType.COLON_EQ: exp.PropertyEQ, 636 } 637 638 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 639 TokenType.OR: exp.Or, 640 } 641 642 EQUALITY = { 643 TokenType.EQ: exp.EQ, 644 TokenType.NEQ: exp.NEQ, 645 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 646 } 647 648 COMPARISON = { 649 TokenType.GT: exp.GT, 650 TokenType.GTE: exp.GTE, 651 TokenType.LT: exp.LT, 652 TokenType.LTE: exp.LTE, 653 } 654 655 BITWISE = { 656 TokenType.AMP: exp.BitwiseAnd, 657 TokenType.CARET: exp.BitwiseXor, 658 TokenType.PIPE: exp.BitwiseOr, 659 } 660 661 TERM = { 662 TokenType.DASH: exp.Sub, 663 TokenType.PLUS: exp.Add, 664 TokenType.MOD: exp.Mod, 665 TokenType.COLLATE: exp.Collate, 666 } 667 668 FACTOR = { 669 TokenType.DIV: exp.IntDiv, 670 TokenType.LR_ARROW: exp.Distance, 671 TokenType.SLASH: exp.Div, 672 TokenType.STAR: exp.Mul, 673 } 674 675 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 676 677 TIMES = { 678 TokenType.TIME, 679 TokenType.TIMETZ, 680 } 681 682 TIMESTAMPS = { 683 TokenType.TIMESTAMP, 684 TokenType.TIMESTAMPTZ, 685 TokenType.TIMESTAMPLTZ, 686 *TIMES, 687 } 688 689 SET_OPERATIONS = { 690 TokenType.UNION, 691 TokenType.INTERSECT, 692 TokenType.EXCEPT, 693 } 694 695 JOIN_METHODS = { 696 TokenType.ASOF, 697 TokenType.NATURAL, 698 TokenType.POSITIONAL, 699 } 700 701 JOIN_SIDES = { 702 TokenType.LEFT, 703 TokenType.RIGHT, 704 TokenType.FULL, 705 } 706 707 JOIN_KINDS = { 708 TokenType.ANTI, 709 TokenType.CROSS, 710 TokenType.INNER, 711 TokenType.OUTER, 712 TokenType.SEMI, 713 TokenType.STRAIGHT_JOIN, 714 } 715 716 JOIN_HINTS: t.Set[str] = set() 717 718 LAMBDAS = { 719 TokenType.ARROW: lambda self, expressions: self.expression( 720 exp.Lambda, 721 this=self._replace_lambda( 722 self._parse_assignment(), 723 expressions, 724 ), 725 expressions=expressions, 726 ), 727 TokenType.FARROW: lambda self, expressions: self.expression( 728 exp.Kwarg, 729 this=exp.var(expressions[0].name), 730 expression=self._parse_assignment(), 731 ), 732 } 733 734 COLUMN_OPERATORS = { 735 TokenType.DOT: None, 736 TokenType.DCOLON: lambda self, this, to: self.expression( 737 exp.Cast if self.STRICT_CAST else exp.TryCast, 738 this=this, 739 to=to, 740 ), 741 TokenType.ARROW: lambda self, this, path: self.expression( 742 exp.JSONExtract, 743 this=this, 744 expression=self.dialect.to_json_path(path), 745 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 746 ), 747 TokenType.DARROW: lambda self, this, path: self.expression( 748 exp.JSONExtractScalar, 749 this=this, 750 expression=self.dialect.to_json_path(path), 751 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 752 ), 753 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 754 exp.JSONBExtract, 755 this=this, 756 expression=path, 757 ), 758 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 759 exp.JSONBExtractScalar, 760 this=this, 761 expression=path, 762 ), 763 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 764 exp.JSONBContains, 765 this=this, 766 expression=key, 767 ), 768 } 769 770 EXPRESSION_PARSERS = { 771 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 772 exp.Column: lambda self: self._parse_column(), 773 exp.Condition: lambda self: self._parse_assignment(), 774 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 775 exp.Expression: lambda self: self._parse_expression(), 776 exp.From: lambda self: self._parse_from(joins=True), 777 exp.Group: lambda self: self._parse_group(), 778 exp.Having: lambda self: self._parse_having(), 779 exp.Hint: lambda self: self._parse_hint_body(), 780 exp.Identifier: lambda self: self._parse_id_var(), 781 exp.Join: lambda self: self._parse_join(), 782 exp.Lambda: lambda self: self._parse_lambda(), 783 exp.Lateral: lambda self: self._parse_lateral(), 784 exp.Limit: lambda self: self._parse_limit(), 785 exp.Offset: lambda self: self._parse_offset(), 786 exp.Order: lambda self: self._parse_order(), 787 exp.Ordered: lambda self: self._parse_ordered(), 788 exp.Properties: lambda self: self._parse_properties(), 789 exp.Qualify: lambda self: self._parse_qualify(), 790 exp.Returning: lambda self: self._parse_returning(), 791 exp.Select: lambda self: self._parse_select(), 792 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 793 exp.Table: lambda self: self._parse_table_parts(), 794 exp.TableAlias: lambda self: self._parse_table_alias(), 795 exp.Tuple: lambda self: self._parse_value(), 796 exp.Whens: lambda self: self._parse_when_matched(), 797 exp.Where: lambda self: self._parse_where(), 798 exp.Window: lambda self: self._parse_named_window(), 799 exp.With: lambda self: self._parse_with(), 800 "JOIN_TYPE": lambda self: self._parse_join_parts(), 801 } 802 803 STATEMENT_PARSERS = { 804 TokenType.ALTER: lambda self: self._parse_alter(), 805 TokenType.ANALYZE: lambda self: self._parse_analyze(), 806 TokenType.BEGIN: lambda self: self._parse_transaction(), 807 TokenType.CACHE: lambda self: self._parse_cache(), 808 TokenType.COMMENT: lambda self: self._parse_comment(), 809 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 810 TokenType.COPY: lambda self: self._parse_copy(), 811 TokenType.CREATE: lambda self: self._parse_create(), 812 TokenType.DELETE: lambda self: self._parse_delete(), 813 TokenType.DESC: lambda self: self._parse_describe(), 814 TokenType.DESCRIBE: lambda self: self._parse_describe(), 815 TokenType.DROP: lambda self: self._parse_drop(), 816 TokenType.GRANT: lambda self: self._parse_grant(), 817 TokenType.INSERT: lambda self: self._parse_insert(), 818 TokenType.KILL: lambda self: self._parse_kill(), 819 TokenType.LOAD: lambda self: self._parse_load(), 820 TokenType.MERGE: lambda self: self._parse_merge(), 821 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 822 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 823 TokenType.REFRESH: lambda self: self._parse_refresh(), 824 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 825 TokenType.SET: lambda self: self._parse_set(), 826 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 827 TokenType.UNCACHE: lambda self: self._parse_uncache(), 828 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 829 TokenType.UPDATE: lambda self: self._parse_update(), 830 TokenType.USE: lambda self: self.expression( 831 exp.Use, 832 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 833 this=self._parse_table(schema=False), 834 ), 835 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 836 } 837 838 UNARY_PARSERS = { 839 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 840 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 841 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 842 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 843 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 844 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 845 } 846 847 STRING_PARSERS = { 848 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 849 exp.RawString, this=token.text 850 ), 851 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 852 exp.National, this=token.text 853 ), 854 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 855 TokenType.STRING: lambda self, token: self.expression( 856 exp.Literal, this=token.text, is_string=True 857 ), 858 TokenType.UNICODE_STRING: lambda self, token: self.expression( 859 exp.UnicodeString, 860 this=token.text, 861 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 862 ), 863 } 864 865 NUMERIC_PARSERS = { 866 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 867 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 868 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 869 TokenType.NUMBER: lambda self, token: self.expression( 870 exp.Literal, this=token.text, is_string=False 871 ), 872 } 873 874 PRIMARY_PARSERS = { 875 **STRING_PARSERS, 876 **NUMERIC_PARSERS, 877 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 878 TokenType.NULL: lambda self, _: self.expression(exp.Null), 879 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 880 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 881 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 882 TokenType.STAR: lambda self, _: self._parse_star_ops(), 883 } 884 885 PLACEHOLDER_PARSERS = { 886 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 887 TokenType.PARAMETER: lambda self: self._parse_parameter(), 888 TokenType.COLON: lambda self: ( 889 self.expression(exp.Placeholder, this=self._prev.text) 890 if self._match_set(self.ID_VAR_TOKENS) 891 else None 892 ), 893 } 894 895 RANGE_PARSERS = { 896 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 897 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 898 TokenType.GLOB: binary_range_parser(exp.Glob), 899 TokenType.ILIKE: binary_range_parser(exp.ILike), 900 TokenType.IN: lambda self, this: self._parse_in(this), 901 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 902 TokenType.IS: lambda self, this: self._parse_is(this), 903 TokenType.LIKE: binary_range_parser(exp.Like), 904 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 905 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 906 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 907 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 908 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 909 } 910 911 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 912 "ALLOWED_VALUES": lambda self: self.expression( 913 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 914 ), 915 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 916 "AUTO": lambda self: self._parse_auto_property(), 917 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 918 "BACKUP": lambda self: self.expression( 919 exp.BackupProperty, this=self._parse_var(any_token=True) 920 ), 921 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 922 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 923 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 924 "CHECKSUM": lambda self: self._parse_checksum(), 925 "CLUSTER BY": lambda self: self._parse_cluster(), 926 "CLUSTERED": lambda self: self._parse_clustered_by(), 927 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 928 exp.CollateProperty, **kwargs 929 ), 930 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 931 "CONTAINS": lambda self: self._parse_contains_property(), 932 "COPY": lambda self: self._parse_copy_property(), 933 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 934 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 935 "DEFINER": lambda self: self._parse_definer(), 936 "DETERMINISTIC": lambda self: self.expression( 937 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 938 ), 939 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 940 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 941 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 942 "DISTKEY": lambda self: self._parse_distkey(), 943 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 944 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 945 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 946 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 947 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 948 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 949 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 950 "FREESPACE": lambda self: self._parse_freespace(), 951 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 952 "HEAP": lambda self: self.expression(exp.HeapProperty), 953 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 954 "IMMUTABLE": lambda self: self.expression( 955 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 956 ), 957 "INHERITS": lambda self: self.expression( 958 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 959 ), 960 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 961 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 962 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 963 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 964 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 965 "LIKE": lambda self: self._parse_create_like(), 966 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 967 "LOCK": lambda self: self._parse_locking(), 968 "LOCKING": lambda self: self._parse_locking(), 969 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 970 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 971 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 972 "MODIFIES": lambda self: self._parse_modifies_property(), 973 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 974 "NO": lambda self: self._parse_no_property(), 975 "ON": lambda self: self._parse_on_property(), 976 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 977 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 978 "PARTITION": lambda self: self._parse_partitioned_of(), 979 "PARTITION BY": lambda self: self._parse_partitioned_by(), 980 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 981 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 982 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 983 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 984 "READS": lambda self: self._parse_reads_property(), 985 "REMOTE": lambda self: self._parse_remote_with_connection(), 986 "RETURNS": lambda self: self._parse_returns(), 987 "STRICT": lambda self: self.expression(exp.StrictProperty), 988 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 989 "ROW": lambda self: self._parse_row(), 990 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 991 "SAMPLE": lambda self: self.expression( 992 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 993 ), 994 "SECURE": lambda self: self.expression(exp.SecureProperty), 995 "SECURITY": lambda self: self._parse_security(), 996 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 997 "SETTINGS": lambda self: self._parse_settings_property(), 998 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 999 "SORTKEY": lambda self: self._parse_sortkey(), 1000 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1001 "STABLE": lambda self: self.expression( 1002 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1003 ), 1004 "STORED": lambda self: self._parse_stored(), 1005 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1006 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1007 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1008 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1009 "TO": lambda self: self._parse_to_table(), 1010 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1011 "TRANSFORM": lambda self: self.expression( 1012 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1013 ), 1014 "TTL": lambda self: self._parse_ttl(), 1015 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1016 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1017 "VOLATILE": lambda self: self._parse_volatile_property(), 1018 "WITH": lambda self: self._parse_with_property(), 1019 } 1020 1021 CONSTRAINT_PARSERS = { 1022 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1023 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1024 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1025 "CHARACTER SET": lambda self: self.expression( 1026 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1027 ), 1028 "CHECK": lambda self: self.expression( 1029 exp.CheckColumnConstraint, 1030 this=self._parse_wrapped(self._parse_assignment), 1031 enforced=self._match_text_seq("ENFORCED"), 1032 ), 1033 "COLLATE": lambda self: self.expression( 1034 exp.CollateColumnConstraint, 1035 this=self._parse_identifier() or self._parse_column(), 1036 ), 1037 "COMMENT": lambda self: self.expression( 1038 exp.CommentColumnConstraint, this=self._parse_string() 1039 ), 1040 "COMPRESS": lambda self: self._parse_compress(), 1041 "CLUSTERED": lambda self: self.expression( 1042 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1043 ), 1044 "NONCLUSTERED": lambda self: self.expression( 1045 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1046 ), 1047 "DEFAULT": lambda self: self.expression( 1048 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1049 ), 1050 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1051 "EPHEMERAL": lambda self: self.expression( 1052 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1053 ), 1054 "EXCLUDE": lambda self: self.expression( 1055 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1056 ), 1057 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1058 "FORMAT": lambda self: self.expression( 1059 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1060 ), 1061 "GENERATED": lambda self: self._parse_generated_as_identity(), 1062 "IDENTITY": lambda self: self._parse_auto_increment(), 1063 "INLINE": lambda self: self._parse_inline(), 1064 "LIKE": lambda self: self._parse_create_like(), 1065 "NOT": lambda self: self._parse_not_constraint(), 1066 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1067 "ON": lambda self: ( 1068 self._match(TokenType.UPDATE) 1069 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1070 ) 1071 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1072 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1073 "PERIOD": lambda self: self._parse_period_for_system_time(), 1074 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1075 "REFERENCES": lambda self: self._parse_references(match=False), 1076 "TITLE": lambda self: self.expression( 1077 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1078 ), 1079 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1080 "UNIQUE": lambda self: self._parse_unique(), 1081 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1082 "WATERMARK": lambda self: self.expression( 1083 exp.WatermarkColumnConstraint, 1084 this=self._match(TokenType.FOR) and self._parse_column(), 1085 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1086 ), 1087 "WITH": lambda self: self.expression( 1088 exp.Properties, expressions=self._parse_wrapped_properties() 1089 ), 1090 } 1091 1092 ALTER_PARSERS = { 1093 "ADD": lambda self: self._parse_alter_table_add(), 1094 "AS": lambda self: self._parse_select(), 1095 "ALTER": lambda self: self._parse_alter_table_alter(), 1096 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1097 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1098 "DROP": lambda self: self._parse_alter_table_drop(), 1099 "RENAME": lambda self: self._parse_alter_table_rename(), 1100 "SET": lambda self: self._parse_alter_table_set(), 1101 "SWAP": lambda self: self.expression( 1102 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1103 ), 1104 } 1105 1106 ALTER_ALTER_PARSERS = { 1107 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1108 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1109 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1110 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1111 } 1112 1113 SCHEMA_UNNAMED_CONSTRAINTS = { 1114 "CHECK", 1115 "EXCLUDE", 1116 "FOREIGN KEY", 1117 "LIKE", 1118 "PERIOD", 1119 "PRIMARY KEY", 1120 "UNIQUE", 1121 "WATERMARK", 1122 } 1123 1124 NO_PAREN_FUNCTION_PARSERS = { 1125 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1126 "CASE": lambda self: self._parse_case(), 1127 "CONNECT_BY_ROOT": lambda self: self.expression( 1128 exp.ConnectByRoot, this=self._parse_column() 1129 ), 1130 "IF": lambda self: self._parse_if(), 1131 } 1132 1133 INVALID_FUNC_NAME_TOKENS = { 1134 TokenType.IDENTIFIER, 1135 TokenType.STRING, 1136 } 1137 1138 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1139 1140 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1141 1142 FUNCTION_PARSERS = { 1143 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1144 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1145 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1146 "DECODE": lambda self: self._parse_decode(), 1147 "EXTRACT": lambda self: self._parse_extract(), 1148 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1149 "GAP_FILL": lambda self: self._parse_gap_fill(), 1150 "JSON_OBJECT": lambda self: self._parse_json_object(), 1151 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1152 "JSON_TABLE": lambda self: self._parse_json_table(), 1153 "MATCH": lambda self: self._parse_match_against(), 1154 "NORMALIZE": lambda self: self._parse_normalize(), 1155 "OPENJSON": lambda self: self._parse_open_json(), 1156 "OVERLAY": lambda self: self._parse_overlay(), 1157 "POSITION": lambda self: self._parse_position(), 1158 "PREDICT": lambda self: self._parse_predict(), 1159 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1160 "STRING_AGG": lambda self: self._parse_string_agg(), 1161 "SUBSTRING": lambda self: self._parse_substring(), 1162 "TRIM": lambda self: self._parse_trim(), 1163 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1164 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1165 "XMLELEMENT": lambda self: self.expression( 1166 exp.XMLElement, 1167 this=self._match_text_seq("NAME") and self._parse_id_var(), 1168 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1169 ), 1170 "XMLTABLE": lambda self: self._parse_xml_table(), 1171 } 1172 1173 QUERY_MODIFIER_PARSERS = { 1174 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1175 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1176 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1177 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1178 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1179 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1180 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1181 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1182 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1183 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1184 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1185 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1186 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1187 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1188 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1189 TokenType.CLUSTER_BY: lambda self: ( 1190 "cluster", 1191 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1192 ), 1193 TokenType.DISTRIBUTE_BY: lambda self: ( 1194 "distribute", 1195 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1196 ), 1197 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1198 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1199 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1200 } 1201 1202 SET_PARSERS = { 1203 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1204 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1205 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1206 "TRANSACTION": lambda self: self._parse_set_transaction(), 1207 } 1208 1209 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1210 1211 TYPE_LITERAL_PARSERS = { 1212 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1213 } 1214 1215 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1216 1217 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1218 1219 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1220 1221 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1222 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1223 "ISOLATION": ( 1224 ("LEVEL", "REPEATABLE", "READ"), 1225 ("LEVEL", "READ", "COMMITTED"), 1226 ("LEVEL", "READ", "UNCOMITTED"), 1227 ("LEVEL", "SERIALIZABLE"), 1228 ), 1229 "READ": ("WRITE", "ONLY"), 1230 } 1231 1232 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1233 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1234 ) 1235 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1236 1237 CREATE_SEQUENCE: OPTIONS_TYPE = { 1238 "SCALE": ("EXTEND", "NOEXTEND"), 1239 "SHARD": ("EXTEND", "NOEXTEND"), 1240 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1241 **dict.fromkeys( 1242 ( 1243 "SESSION", 1244 "GLOBAL", 1245 "KEEP", 1246 "NOKEEP", 1247 "ORDER", 1248 "NOORDER", 1249 "NOCACHE", 1250 "CYCLE", 1251 "NOCYCLE", 1252 "NOMINVALUE", 1253 "NOMAXVALUE", 1254 "NOSCALE", 1255 "NOSHARD", 1256 ), 1257 tuple(), 1258 ), 1259 } 1260 1261 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1262 1263 USABLES: OPTIONS_TYPE = dict.fromkeys( 1264 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1265 ) 1266 1267 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1268 1269 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1270 "TYPE": ("EVOLUTION",), 1271 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1272 } 1273 1274 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1275 1276 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1277 1278 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1279 "NOT": ("ENFORCED",), 1280 "MATCH": ( 1281 "FULL", 1282 "PARTIAL", 1283 "SIMPLE", 1284 ), 1285 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1286 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1287 } 1288 1289 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1290 1291 CLONE_KEYWORDS = {"CLONE", "COPY"} 1292 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1293 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1294 1295 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1296 1297 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1298 1299 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1300 1301 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1302 1303 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1304 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1305 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1306 1307 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1308 1309 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1310 1311 ADD_CONSTRAINT_TOKENS = { 1312 TokenType.CONSTRAINT, 1313 TokenType.FOREIGN_KEY, 1314 TokenType.INDEX, 1315 TokenType.KEY, 1316 TokenType.PRIMARY_KEY, 1317 TokenType.UNIQUE, 1318 } 1319 1320 DISTINCT_TOKENS = {TokenType.DISTINCT} 1321 1322 NULL_TOKENS = {TokenType.NULL} 1323 1324 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1325 1326 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1327 1328 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1329 1330 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1331 1332 ODBC_DATETIME_LITERALS = { 1333 "d": exp.Date, 1334 "t": exp.Time, 1335 "ts": exp.Timestamp, 1336 } 1337 1338 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1339 1340 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1341 1342 # The style options for the DESCRIBE statement 1343 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1344 1345 # The style options for the ANALYZE statement 1346 ANALYZE_STYLES = { 1347 "BUFFER_USAGE_LIMIT", 1348 "FULL", 1349 "LOCAL", 1350 "NO_WRITE_TO_BINLOG", 1351 "SAMPLE", 1352 "SKIP_LOCKED", 1353 "VERBOSE", 1354 } 1355 1356 ANALYZE_EXPRESSION_PARSERS = { 1357 "ALL": lambda self: self._parse_analyze_columns(), 1358 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1359 "DELETE": lambda self: self._parse_analyze_delete(), 1360 "DROP": lambda self: self._parse_analyze_histogram(), 1361 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1362 "LIST": lambda self: self._parse_analyze_list(), 1363 "PREDICATE": lambda self: self._parse_analyze_columns(), 1364 "UPDATE": lambda self: self._parse_analyze_histogram(), 1365 "VALIDATE": lambda self: self._parse_analyze_validate(), 1366 } 1367 1368 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1369 1370 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1371 1372 OPERATION_MODIFIERS: t.Set[str] = set() 1373 1374 STRICT_CAST = True 1375 1376 PREFIXED_PIVOT_COLUMNS = False 1377 IDENTIFY_PIVOT_STRINGS = False 1378 1379 LOG_DEFAULTS_TO_LN = False 1380 1381 # Whether ADD is present for each column added by ALTER TABLE 1382 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1383 1384 # Whether the table sample clause expects CSV syntax 1385 TABLESAMPLE_CSV = False 1386 1387 # The default method used for table sampling 1388 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1389 1390 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1391 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1392 1393 # Whether the TRIM function expects the characters to trim as its first argument 1394 TRIM_PATTERN_FIRST = False 1395 1396 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1397 STRING_ALIASES = False 1398 1399 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1400 MODIFIERS_ATTACHED_TO_SET_OP = True 1401 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1402 1403 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1404 NO_PAREN_IF_COMMANDS = True 1405 1406 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1407 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1408 1409 # Whether the `:` operator is used to extract a value from a VARIANT column 1410 COLON_IS_VARIANT_EXTRACT = False 1411 1412 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1413 # If this is True and '(' is not found, the keyword will be treated as an identifier 1414 VALUES_FOLLOWED_BY_PAREN = True 1415 1416 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1417 SUPPORTS_IMPLICIT_UNNEST = False 1418 1419 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1420 INTERVAL_SPANS = True 1421 1422 # Whether a PARTITION clause can follow a table reference 1423 SUPPORTS_PARTITION_SELECTION = False 1424 1425 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1426 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1427 1428 # Whether the 'AS' keyword is optional in the CTE definition syntax 1429 OPTIONAL_ALIAS_TOKEN_CTE = True 1430 1431 __slots__ = ( 1432 "error_level", 1433 "error_message_context", 1434 "max_errors", 1435 "dialect", 1436 "sql", 1437 "errors", 1438 "_tokens", 1439 "_index", 1440 "_curr", 1441 "_next", 1442 "_prev", 1443 "_prev_comments", 1444 ) 1445 1446 # Autofilled 1447 SHOW_TRIE: t.Dict = {} 1448 SET_TRIE: t.Dict = {} 1449 1450 def __init__( 1451 self, 1452 error_level: t.Optional[ErrorLevel] = None, 1453 error_message_context: int = 100, 1454 max_errors: int = 3, 1455 dialect: DialectType = None, 1456 ): 1457 from sqlglot.dialects import Dialect 1458 1459 self.error_level = error_level or ErrorLevel.IMMEDIATE 1460 self.error_message_context = error_message_context 1461 self.max_errors = max_errors 1462 self.dialect = Dialect.get_or_raise(dialect) 1463 self.reset() 1464 1465 def reset(self): 1466 self.sql = "" 1467 self.errors = [] 1468 self._tokens = [] 1469 self._index = 0 1470 self._curr = None 1471 self._next = None 1472 self._prev = None 1473 self._prev_comments = None 1474 1475 def parse( 1476 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1477 ) -> t.List[t.Optional[exp.Expression]]: 1478 """ 1479 Parses a list of tokens and returns a list of syntax trees, one tree 1480 per parsed SQL statement. 1481 1482 Args: 1483 raw_tokens: The list of tokens. 1484 sql: The original SQL string, used to produce helpful debug messages. 1485 1486 Returns: 1487 The list of the produced syntax trees. 1488 """ 1489 return self._parse( 1490 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1491 ) 1492 1493 def parse_into( 1494 self, 1495 expression_types: exp.IntoType, 1496 raw_tokens: t.List[Token], 1497 sql: t.Optional[str] = None, 1498 ) -> t.List[t.Optional[exp.Expression]]: 1499 """ 1500 Parses a list of tokens into a given Expression type. If a collection of Expression 1501 types is given instead, this method will try to parse the token list into each one 1502 of them, stopping at the first for which the parsing succeeds. 1503 1504 Args: 1505 expression_types: The expression type(s) to try and parse the token list into. 1506 raw_tokens: The list of tokens. 1507 sql: The original SQL string, used to produce helpful debug messages. 1508 1509 Returns: 1510 The target Expression. 1511 """ 1512 errors = [] 1513 for expression_type in ensure_list(expression_types): 1514 parser = self.EXPRESSION_PARSERS.get(expression_type) 1515 if not parser: 1516 raise TypeError(f"No parser registered for {expression_type}") 1517 1518 try: 1519 return self._parse(parser, raw_tokens, sql) 1520 except ParseError as e: 1521 e.errors[0]["into_expression"] = expression_type 1522 errors.append(e) 1523 1524 raise ParseError( 1525 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1526 errors=merge_errors(errors), 1527 ) from errors[-1] 1528 1529 def _parse( 1530 self, 1531 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1532 raw_tokens: t.List[Token], 1533 sql: t.Optional[str] = None, 1534 ) -> t.List[t.Optional[exp.Expression]]: 1535 self.reset() 1536 self.sql = sql or "" 1537 1538 total = len(raw_tokens) 1539 chunks: t.List[t.List[Token]] = [[]] 1540 1541 for i, token in enumerate(raw_tokens): 1542 if token.token_type == TokenType.SEMICOLON: 1543 if token.comments: 1544 chunks.append([token]) 1545 1546 if i < total - 1: 1547 chunks.append([]) 1548 else: 1549 chunks[-1].append(token) 1550 1551 expressions = [] 1552 1553 for tokens in chunks: 1554 self._index = -1 1555 self._tokens = tokens 1556 self._advance() 1557 1558 expressions.append(parse_method(self)) 1559 1560 if self._index < len(self._tokens): 1561 self.raise_error("Invalid expression / Unexpected token") 1562 1563 self.check_errors() 1564 1565 return expressions 1566 1567 def check_errors(self) -> None: 1568 """Logs or raises any found errors, depending on the chosen error level setting.""" 1569 if self.error_level == ErrorLevel.WARN: 1570 for error in self.errors: 1571 logger.error(str(error)) 1572 elif self.error_level == ErrorLevel.RAISE and self.errors: 1573 raise ParseError( 1574 concat_messages(self.errors, self.max_errors), 1575 errors=merge_errors(self.errors), 1576 ) 1577 1578 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1579 """ 1580 Appends an error in the list of recorded errors or raises it, depending on the chosen 1581 error level setting. 1582 """ 1583 token = token or self._curr or self._prev or Token.string("") 1584 start = token.start 1585 end = token.end + 1 1586 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1587 highlight = self.sql[start:end] 1588 end_context = self.sql[end : end + self.error_message_context] 1589 1590 error = ParseError.new( 1591 f"{message}. Line {token.line}, Col: {token.col}.\n" 1592 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1593 description=message, 1594 line=token.line, 1595 col=token.col, 1596 start_context=start_context, 1597 highlight=highlight, 1598 end_context=end_context, 1599 ) 1600 1601 if self.error_level == ErrorLevel.IMMEDIATE: 1602 raise error 1603 1604 self.errors.append(error) 1605 1606 def expression( 1607 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1608 ) -> E: 1609 """ 1610 Creates a new, validated Expression. 1611 1612 Args: 1613 exp_class: The expression class to instantiate. 1614 comments: An optional list of comments to attach to the expression. 1615 kwargs: The arguments to set for the expression along with their respective values. 1616 1617 Returns: 1618 The target expression. 1619 """ 1620 instance = exp_class(**kwargs) 1621 instance.add_comments(comments) if comments else self._add_comments(instance) 1622 return self.validate_expression(instance) 1623 1624 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1625 if expression and self._prev_comments: 1626 expression.add_comments(self._prev_comments) 1627 self._prev_comments = None 1628 1629 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1630 """ 1631 Validates an Expression, making sure that all its mandatory arguments are set. 1632 1633 Args: 1634 expression: The expression to validate. 1635 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1636 1637 Returns: 1638 The validated expression. 1639 """ 1640 if self.error_level != ErrorLevel.IGNORE: 1641 for error_message in expression.error_messages(args): 1642 self.raise_error(error_message) 1643 1644 return expression 1645 1646 def _find_sql(self, start: Token, end: Token) -> str: 1647 return self.sql[start.start : end.end + 1] 1648 1649 def _is_connected(self) -> bool: 1650 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1651 1652 def _advance(self, times: int = 1) -> None: 1653 self._index += times 1654 self._curr = seq_get(self._tokens, self._index) 1655 self._next = seq_get(self._tokens, self._index + 1) 1656 1657 if self._index > 0: 1658 self._prev = self._tokens[self._index - 1] 1659 self._prev_comments = self._prev.comments 1660 else: 1661 self._prev = None 1662 self._prev_comments = None 1663 1664 def _retreat(self, index: int) -> None: 1665 if index != self._index: 1666 self._advance(index - self._index) 1667 1668 def _warn_unsupported(self) -> None: 1669 if len(self._tokens) <= 1: 1670 return 1671 1672 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1673 # interested in emitting a warning for the one being currently processed. 1674 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1675 1676 logger.warning( 1677 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1678 ) 1679 1680 def _parse_command(self) -> exp.Command: 1681 self._warn_unsupported() 1682 return self.expression( 1683 exp.Command, 1684 comments=self._prev_comments, 1685 this=self._prev.text.upper(), 1686 expression=self._parse_string(), 1687 ) 1688 1689 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1690 """ 1691 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1692 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1693 solve this by setting & resetting the parser state accordingly 1694 """ 1695 index = self._index 1696 error_level = self.error_level 1697 1698 self.error_level = ErrorLevel.IMMEDIATE 1699 try: 1700 this = parse_method() 1701 except ParseError: 1702 this = None 1703 finally: 1704 if not this or retreat: 1705 self._retreat(index) 1706 self.error_level = error_level 1707 1708 return this 1709 1710 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1711 start = self._prev 1712 exists = self._parse_exists() if allow_exists else None 1713 1714 self._match(TokenType.ON) 1715 1716 materialized = self._match_text_seq("MATERIALIZED") 1717 kind = self._match_set(self.CREATABLES) and self._prev 1718 if not kind: 1719 return self._parse_as_command(start) 1720 1721 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1722 this = self._parse_user_defined_function(kind=kind.token_type) 1723 elif kind.token_type == TokenType.TABLE: 1724 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1725 elif kind.token_type == TokenType.COLUMN: 1726 this = self._parse_column() 1727 else: 1728 this = self._parse_id_var() 1729 1730 self._match(TokenType.IS) 1731 1732 return self.expression( 1733 exp.Comment, 1734 this=this, 1735 kind=kind.text, 1736 expression=self._parse_string(), 1737 exists=exists, 1738 materialized=materialized, 1739 ) 1740 1741 def _parse_to_table( 1742 self, 1743 ) -> exp.ToTableProperty: 1744 table = self._parse_table_parts(schema=True) 1745 return self.expression(exp.ToTableProperty, this=table) 1746 1747 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1748 def _parse_ttl(self) -> exp.Expression: 1749 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1750 this = self._parse_bitwise() 1751 1752 if self._match_text_seq("DELETE"): 1753 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1754 if self._match_text_seq("RECOMPRESS"): 1755 return self.expression( 1756 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1757 ) 1758 if self._match_text_seq("TO", "DISK"): 1759 return self.expression( 1760 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1761 ) 1762 if self._match_text_seq("TO", "VOLUME"): 1763 return self.expression( 1764 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1765 ) 1766 1767 return this 1768 1769 expressions = self._parse_csv(_parse_ttl_action) 1770 where = self._parse_where() 1771 group = self._parse_group() 1772 1773 aggregates = None 1774 if group and self._match(TokenType.SET): 1775 aggregates = self._parse_csv(self._parse_set_item) 1776 1777 return self.expression( 1778 exp.MergeTreeTTL, 1779 expressions=expressions, 1780 where=where, 1781 group=group, 1782 aggregates=aggregates, 1783 ) 1784 1785 def _parse_statement(self) -> t.Optional[exp.Expression]: 1786 if self._curr is None: 1787 return None 1788 1789 if self._match_set(self.STATEMENT_PARSERS): 1790 comments = self._prev_comments 1791 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1792 stmt.add_comments(comments, prepend=True) 1793 return stmt 1794 1795 if self._match_set(self.dialect.tokenizer.COMMANDS): 1796 return self._parse_command() 1797 1798 expression = self._parse_expression() 1799 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1800 return self._parse_query_modifiers(expression) 1801 1802 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1803 start = self._prev 1804 temporary = self._match(TokenType.TEMPORARY) 1805 materialized = self._match_text_seq("MATERIALIZED") 1806 1807 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1808 if not kind: 1809 return self._parse_as_command(start) 1810 1811 concurrently = self._match_text_seq("CONCURRENTLY") 1812 if_exists = exists or self._parse_exists() 1813 1814 if kind == "COLUMN": 1815 this = self._parse_column() 1816 else: 1817 this = self._parse_table_parts( 1818 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1819 ) 1820 1821 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1822 1823 if self._match(TokenType.L_PAREN, advance=False): 1824 expressions = self._parse_wrapped_csv(self._parse_types) 1825 else: 1826 expressions = None 1827 1828 return self.expression( 1829 exp.Drop, 1830 exists=if_exists, 1831 this=this, 1832 expressions=expressions, 1833 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1834 temporary=temporary, 1835 materialized=materialized, 1836 cascade=self._match_text_seq("CASCADE"), 1837 constraints=self._match_text_seq("CONSTRAINTS"), 1838 purge=self._match_text_seq("PURGE"), 1839 cluster=cluster, 1840 concurrently=concurrently, 1841 ) 1842 1843 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1844 return ( 1845 self._match_text_seq("IF") 1846 and (not not_ or self._match(TokenType.NOT)) 1847 and self._match(TokenType.EXISTS) 1848 ) 1849 1850 def _parse_create(self) -> exp.Create | exp.Command: 1851 # Note: this can't be None because we've matched a statement parser 1852 start = self._prev 1853 1854 replace = ( 1855 start.token_type == TokenType.REPLACE 1856 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1857 or self._match_pair(TokenType.OR, TokenType.ALTER) 1858 ) 1859 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1860 1861 unique = self._match(TokenType.UNIQUE) 1862 1863 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1864 clustered = True 1865 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1866 "COLUMNSTORE" 1867 ): 1868 clustered = False 1869 else: 1870 clustered = None 1871 1872 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1873 self._advance() 1874 1875 properties = None 1876 create_token = self._match_set(self.CREATABLES) and self._prev 1877 1878 if not create_token: 1879 # exp.Properties.Location.POST_CREATE 1880 properties = self._parse_properties() 1881 create_token = self._match_set(self.CREATABLES) and self._prev 1882 1883 if not properties or not create_token: 1884 return self._parse_as_command(start) 1885 1886 concurrently = self._match_text_seq("CONCURRENTLY") 1887 exists = self._parse_exists(not_=True) 1888 this = None 1889 expression: t.Optional[exp.Expression] = None 1890 indexes = None 1891 no_schema_binding = None 1892 begin = None 1893 end = None 1894 clone = None 1895 1896 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1897 nonlocal properties 1898 if properties and temp_props: 1899 properties.expressions.extend(temp_props.expressions) 1900 elif temp_props: 1901 properties = temp_props 1902 1903 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1904 this = self._parse_user_defined_function(kind=create_token.token_type) 1905 1906 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1907 extend_props(self._parse_properties()) 1908 1909 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1910 extend_props(self._parse_properties()) 1911 1912 if not expression: 1913 if self._match(TokenType.COMMAND): 1914 expression = self._parse_as_command(self._prev) 1915 else: 1916 begin = self._match(TokenType.BEGIN) 1917 return_ = self._match_text_seq("RETURN") 1918 1919 if self._match(TokenType.STRING, advance=False): 1920 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1921 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1922 expression = self._parse_string() 1923 extend_props(self._parse_properties()) 1924 else: 1925 expression = self._parse_user_defined_function_expression() 1926 1927 end = self._match_text_seq("END") 1928 1929 if return_: 1930 expression = self.expression(exp.Return, this=expression) 1931 elif create_token.token_type == TokenType.INDEX: 1932 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1933 if not self._match(TokenType.ON): 1934 index = self._parse_id_var() 1935 anonymous = False 1936 else: 1937 index = None 1938 anonymous = True 1939 1940 this = self._parse_index(index=index, anonymous=anonymous) 1941 elif create_token.token_type in self.DB_CREATABLES: 1942 table_parts = self._parse_table_parts( 1943 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1944 ) 1945 1946 # exp.Properties.Location.POST_NAME 1947 self._match(TokenType.COMMA) 1948 extend_props(self._parse_properties(before=True)) 1949 1950 this = self._parse_schema(this=table_parts) 1951 1952 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1953 extend_props(self._parse_properties()) 1954 1955 self._match(TokenType.ALIAS) 1956 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1957 # exp.Properties.Location.POST_ALIAS 1958 extend_props(self._parse_properties()) 1959 1960 if create_token.token_type == TokenType.SEQUENCE: 1961 expression = self._parse_types() 1962 extend_props(self._parse_properties()) 1963 else: 1964 expression = self._parse_ddl_select() 1965 1966 if create_token.token_type == TokenType.TABLE: 1967 # exp.Properties.Location.POST_EXPRESSION 1968 extend_props(self._parse_properties()) 1969 1970 indexes = [] 1971 while True: 1972 index = self._parse_index() 1973 1974 # exp.Properties.Location.POST_INDEX 1975 extend_props(self._parse_properties()) 1976 if not index: 1977 break 1978 else: 1979 self._match(TokenType.COMMA) 1980 indexes.append(index) 1981 elif create_token.token_type == TokenType.VIEW: 1982 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1983 no_schema_binding = True 1984 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1985 extend_props(self._parse_properties()) 1986 1987 shallow = self._match_text_seq("SHALLOW") 1988 1989 if self._match_texts(self.CLONE_KEYWORDS): 1990 copy = self._prev.text.lower() == "copy" 1991 clone = self.expression( 1992 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1993 ) 1994 1995 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1996 return self._parse_as_command(start) 1997 1998 create_kind_text = create_token.text.upper() 1999 return self.expression( 2000 exp.Create, 2001 this=this, 2002 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2003 replace=replace, 2004 refresh=refresh, 2005 unique=unique, 2006 expression=expression, 2007 exists=exists, 2008 properties=properties, 2009 indexes=indexes, 2010 no_schema_binding=no_schema_binding, 2011 begin=begin, 2012 end=end, 2013 clone=clone, 2014 concurrently=concurrently, 2015 clustered=clustered, 2016 ) 2017 2018 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2019 seq = exp.SequenceProperties() 2020 2021 options = [] 2022 index = self._index 2023 2024 while self._curr: 2025 self._match(TokenType.COMMA) 2026 if self._match_text_seq("INCREMENT"): 2027 self._match_text_seq("BY") 2028 self._match_text_seq("=") 2029 seq.set("increment", self._parse_term()) 2030 elif self._match_text_seq("MINVALUE"): 2031 seq.set("minvalue", self._parse_term()) 2032 elif self._match_text_seq("MAXVALUE"): 2033 seq.set("maxvalue", self._parse_term()) 2034 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2035 self._match_text_seq("=") 2036 seq.set("start", self._parse_term()) 2037 elif self._match_text_seq("CACHE"): 2038 # T-SQL allows empty CACHE which is initialized dynamically 2039 seq.set("cache", self._parse_number() or True) 2040 elif self._match_text_seq("OWNED", "BY"): 2041 # "OWNED BY NONE" is the default 2042 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2043 else: 2044 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2045 if opt: 2046 options.append(opt) 2047 else: 2048 break 2049 2050 seq.set("options", options if options else None) 2051 return None if self._index == index else seq 2052 2053 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2054 # only used for teradata currently 2055 self._match(TokenType.COMMA) 2056 2057 kwargs = { 2058 "no": self._match_text_seq("NO"), 2059 "dual": self._match_text_seq("DUAL"), 2060 "before": self._match_text_seq("BEFORE"), 2061 "default": self._match_text_seq("DEFAULT"), 2062 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2063 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2064 "after": self._match_text_seq("AFTER"), 2065 "minimum": self._match_texts(("MIN", "MINIMUM")), 2066 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2067 } 2068 2069 if self._match_texts(self.PROPERTY_PARSERS): 2070 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2071 try: 2072 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2073 except TypeError: 2074 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2075 2076 return None 2077 2078 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2079 return self._parse_wrapped_csv(self._parse_property) 2080 2081 def _parse_property(self) -> t.Optional[exp.Expression]: 2082 if self._match_texts(self.PROPERTY_PARSERS): 2083 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2084 2085 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2086 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2087 2088 if self._match_text_seq("COMPOUND", "SORTKEY"): 2089 return self._parse_sortkey(compound=True) 2090 2091 if self._match_text_seq("SQL", "SECURITY"): 2092 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2093 2094 index = self._index 2095 key = self._parse_column() 2096 2097 if not self._match(TokenType.EQ): 2098 self._retreat(index) 2099 return self._parse_sequence_properties() 2100 2101 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2102 if isinstance(key, exp.Column): 2103 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2104 2105 value = self._parse_bitwise() or self._parse_var(any_token=True) 2106 2107 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2108 if isinstance(value, exp.Column): 2109 value = exp.var(value.name) 2110 2111 return self.expression(exp.Property, this=key, value=value) 2112 2113 def _parse_stored(self) -> exp.FileFormatProperty: 2114 self._match(TokenType.ALIAS) 2115 2116 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2117 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2118 2119 return self.expression( 2120 exp.FileFormatProperty, 2121 this=( 2122 self.expression( 2123 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2124 ) 2125 if input_format or output_format 2126 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2127 ), 2128 ) 2129 2130 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2131 field = self._parse_field() 2132 if isinstance(field, exp.Identifier) and not field.quoted: 2133 field = exp.var(field) 2134 2135 return field 2136 2137 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2138 self._match(TokenType.EQ) 2139 self._match(TokenType.ALIAS) 2140 2141 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2142 2143 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2144 properties = [] 2145 while True: 2146 if before: 2147 prop = self._parse_property_before() 2148 else: 2149 prop = self._parse_property() 2150 if not prop: 2151 break 2152 for p in ensure_list(prop): 2153 properties.append(p) 2154 2155 if properties: 2156 return self.expression(exp.Properties, expressions=properties) 2157 2158 return None 2159 2160 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2161 return self.expression( 2162 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2163 ) 2164 2165 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2166 if self._match_texts(("DEFINER", "INVOKER")): 2167 security_specifier = self._prev.text.upper() 2168 return self.expression(exp.SecurityProperty, this=security_specifier) 2169 return None 2170 2171 def _parse_settings_property(self) -> exp.SettingsProperty: 2172 return self.expression( 2173 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2174 ) 2175 2176 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2177 if self._index >= 2: 2178 pre_volatile_token = self._tokens[self._index - 2] 2179 else: 2180 pre_volatile_token = None 2181 2182 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2183 return exp.VolatileProperty() 2184 2185 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2186 2187 def _parse_retention_period(self) -> exp.Var: 2188 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2189 number = self._parse_number() 2190 number_str = f"{number} " if number else "" 2191 unit = self._parse_var(any_token=True) 2192 return exp.var(f"{number_str}{unit}") 2193 2194 def _parse_system_versioning_property( 2195 self, with_: bool = False 2196 ) -> exp.WithSystemVersioningProperty: 2197 self._match(TokenType.EQ) 2198 prop = self.expression( 2199 exp.WithSystemVersioningProperty, 2200 **{ # type: ignore 2201 "on": True, 2202 "with": with_, 2203 }, 2204 ) 2205 2206 if self._match_text_seq("OFF"): 2207 prop.set("on", False) 2208 return prop 2209 2210 self._match(TokenType.ON) 2211 if self._match(TokenType.L_PAREN): 2212 while self._curr and not self._match(TokenType.R_PAREN): 2213 if self._match_text_seq("HISTORY_TABLE", "="): 2214 prop.set("this", self._parse_table_parts()) 2215 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2216 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2217 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2218 prop.set("retention_period", self._parse_retention_period()) 2219 2220 self._match(TokenType.COMMA) 2221 2222 return prop 2223 2224 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2225 self._match(TokenType.EQ) 2226 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2227 prop = self.expression(exp.DataDeletionProperty, on=on) 2228 2229 if self._match(TokenType.L_PAREN): 2230 while self._curr and not self._match(TokenType.R_PAREN): 2231 if self._match_text_seq("FILTER_COLUMN", "="): 2232 prop.set("filter_column", self._parse_column()) 2233 elif self._match_text_seq("RETENTION_PERIOD", "="): 2234 prop.set("retention_period", self._parse_retention_period()) 2235 2236 self._match(TokenType.COMMA) 2237 2238 return prop 2239 2240 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2241 kind = "HASH" 2242 expressions: t.Optional[t.List[exp.Expression]] = None 2243 if self._match_text_seq("BY", "HASH"): 2244 expressions = self._parse_wrapped_csv(self._parse_id_var) 2245 elif self._match_text_seq("BY", "RANDOM"): 2246 kind = "RANDOM" 2247 2248 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2249 buckets: t.Optional[exp.Expression] = None 2250 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2251 buckets = self._parse_number() 2252 2253 return self.expression( 2254 exp.DistributedByProperty, 2255 expressions=expressions, 2256 kind=kind, 2257 buckets=buckets, 2258 order=self._parse_order(), 2259 ) 2260 2261 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2262 self._match_text_seq("KEY") 2263 expressions = self._parse_wrapped_id_vars() 2264 return self.expression(expr_type, expressions=expressions) 2265 2266 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2267 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2268 prop = self._parse_system_versioning_property(with_=True) 2269 self._match_r_paren() 2270 return prop 2271 2272 if self._match(TokenType.L_PAREN, advance=False): 2273 return self._parse_wrapped_properties() 2274 2275 if self._match_text_seq("JOURNAL"): 2276 return self._parse_withjournaltable() 2277 2278 if self._match_texts(self.VIEW_ATTRIBUTES): 2279 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2280 2281 if self._match_text_seq("DATA"): 2282 return self._parse_withdata(no=False) 2283 elif self._match_text_seq("NO", "DATA"): 2284 return self._parse_withdata(no=True) 2285 2286 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2287 return self._parse_serde_properties(with_=True) 2288 2289 if self._match(TokenType.SCHEMA): 2290 return self.expression( 2291 exp.WithSchemaBindingProperty, 2292 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2293 ) 2294 2295 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2296 return self.expression( 2297 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2298 ) 2299 2300 if not self._next: 2301 return None 2302 2303 return self._parse_withisolatedloading() 2304 2305 def _parse_procedure_option(self) -> exp.Expression | None: 2306 if self._match_text_seq("EXECUTE", "AS"): 2307 return self.expression( 2308 exp.ExecuteAsProperty, 2309 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2310 or self._parse_string(), 2311 ) 2312 2313 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2314 2315 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2316 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2317 self._match(TokenType.EQ) 2318 2319 user = self._parse_id_var() 2320 self._match(TokenType.PARAMETER) 2321 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2322 2323 if not user or not host: 2324 return None 2325 2326 return exp.DefinerProperty(this=f"{user}@{host}") 2327 2328 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2329 self._match(TokenType.TABLE) 2330 self._match(TokenType.EQ) 2331 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2332 2333 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2334 return self.expression(exp.LogProperty, no=no) 2335 2336 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2337 return self.expression(exp.JournalProperty, **kwargs) 2338 2339 def _parse_checksum(self) -> exp.ChecksumProperty: 2340 self._match(TokenType.EQ) 2341 2342 on = None 2343 if self._match(TokenType.ON): 2344 on = True 2345 elif self._match_text_seq("OFF"): 2346 on = False 2347 2348 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2349 2350 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2351 return self.expression( 2352 exp.Cluster, 2353 expressions=( 2354 self._parse_wrapped_csv(self._parse_ordered) 2355 if wrapped 2356 else self._parse_csv(self._parse_ordered) 2357 ), 2358 ) 2359 2360 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2361 self._match_text_seq("BY") 2362 2363 self._match_l_paren() 2364 expressions = self._parse_csv(self._parse_column) 2365 self._match_r_paren() 2366 2367 if self._match_text_seq("SORTED", "BY"): 2368 self._match_l_paren() 2369 sorted_by = self._parse_csv(self._parse_ordered) 2370 self._match_r_paren() 2371 else: 2372 sorted_by = None 2373 2374 self._match(TokenType.INTO) 2375 buckets = self._parse_number() 2376 self._match_text_seq("BUCKETS") 2377 2378 return self.expression( 2379 exp.ClusteredByProperty, 2380 expressions=expressions, 2381 sorted_by=sorted_by, 2382 buckets=buckets, 2383 ) 2384 2385 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2386 if not self._match_text_seq("GRANTS"): 2387 self._retreat(self._index - 1) 2388 return None 2389 2390 return self.expression(exp.CopyGrantsProperty) 2391 2392 def _parse_freespace(self) -> exp.FreespaceProperty: 2393 self._match(TokenType.EQ) 2394 return self.expression( 2395 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2396 ) 2397 2398 def _parse_mergeblockratio( 2399 self, no: bool = False, default: bool = False 2400 ) -> exp.MergeBlockRatioProperty: 2401 if self._match(TokenType.EQ): 2402 return self.expression( 2403 exp.MergeBlockRatioProperty, 2404 this=self._parse_number(), 2405 percent=self._match(TokenType.PERCENT), 2406 ) 2407 2408 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2409 2410 def _parse_datablocksize( 2411 self, 2412 default: t.Optional[bool] = None, 2413 minimum: t.Optional[bool] = None, 2414 maximum: t.Optional[bool] = None, 2415 ) -> exp.DataBlocksizeProperty: 2416 self._match(TokenType.EQ) 2417 size = self._parse_number() 2418 2419 units = None 2420 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2421 units = self._prev.text 2422 2423 return self.expression( 2424 exp.DataBlocksizeProperty, 2425 size=size, 2426 units=units, 2427 default=default, 2428 minimum=minimum, 2429 maximum=maximum, 2430 ) 2431 2432 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2433 self._match(TokenType.EQ) 2434 always = self._match_text_seq("ALWAYS") 2435 manual = self._match_text_seq("MANUAL") 2436 never = self._match_text_seq("NEVER") 2437 default = self._match_text_seq("DEFAULT") 2438 2439 autotemp = None 2440 if self._match_text_seq("AUTOTEMP"): 2441 autotemp = self._parse_schema() 2442 2443 return self.expression( 2444 exp.BlockCompressionProperty, 2445 always=always, 2446 manual=manual, 2447 never=never, 2448 default=default, 2449 autotemp=autotemp, 2450 ) 2451 2452 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2453 index = self._index 2454 no = self._match_text_seq("NO") 2455 concurrent = self._match_text_seq("CONCURRENT") 2456 2457 if not self._match_text_seq("ISOLATED", "LOADING"): 2458 self._retreat(index) 2459 return None 2460 2461 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2462 return self.expression( 2463 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2464 ) 2465 2466 def _parse_locking(self) -> exp.LockingProperty: 2467 if self._match(TokenType.TABLE): 2468 kind = "TABLE" 2469 elif self._match(TokenType.VIEW): 2470 kind = "VIEW" 2471 elif self._match(TokenType.ROW): 2472 kind = "ROW" 2473 elif self._match_text_seq("DATABASE"): 2474 kind = "DATABASE" 2475 else: 2476 kind = None 2477 2478 if kind in ("DATABASE", "TABLE", "VIEW"): 2479 this = self._parse_table_parts() 2480 else: 2481 this = None 2482 2483 if self._match(TokenType.FOR): 2484 for_or_in = "FOR" 2485 elif self._match(TokenType.IN): 2486 for_or_in = "IN" 2487 else: 2488 for_or_in = None 2489 2490 if self._match_text_seq("ACCESS"): 2491 lock_type = "ACCESS" 2492 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2493 lock_type = "EXCLUSIVE" 2494 elif self._match_text_seq("SHARE"): 2495 lock_type = "SHARE" 2496 elif self._match_text_seq("READ"): 2497 lock_type = "READ" 2498 elif self._match_text_seq("WRITE"): 2499 lock_type = "WRITE" 2500 elif self._match_text_seq("CHECKSUM"): 2501 lock_type = "CHECKSUM" 2502 else: 2503 lock_type = None 2504 2505 override = self._match_text_seq("OVERRIDE") 2506 2507 return self.expression( 2508 exp.LockingProperty, 2509 this=this, 2510 kind=kind, 2511 for_or_in=for_or_in, 2512 lock_type=lock_type, 2513 override=override, 2514 ) 2515 2516 def _parse_partition_by(self) -> t.List[exp.Expression]: 2517 if self._match(TokenType.PARTITION_BY): 2518 return self._parse_csv(self._parse_assignment) 2519 return [] 2520 2521 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2522 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2523 if self._match_text_seq("MINVALUE"): 2524 return exp.var("MINVALUE") 2525 if self._match_text_seq("MAXVALUE"): 2526 return exp.var("MAXVALUE") 2527 return self._parse_bitwise() 2528 2529 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2530 expression = None 2531 from_expressions = None 2532 to_expressions = None 2533 2534 if self._match(TokenType.IN): 2535 this = self._parse_wrapped_csv(self._parse_bitwise) 2536 elif self._match(TokenType.FROM): 2537 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2538 self._match_text_seq("TO") 2539 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2540 elif self._match_text_seq("WITH", "(", "MODULUS"): 2541 this = self._parse_number() 2542 self._match_text_seq(",", "REMAINDER") 2543 expression = self._parse_number() 2544 self._match_r_paren() 2545 else: 2546 self.raise_error("Failed to parse partition bound spec.") 2547 2548 return self.expression( 2549 exp.PartitionBoundSpec, 2550 this=this, 2551 expression=expression, 2552 from_expressions=from_expressions, 2553 to_expressions=to_expressions, 2554 ) 2555 2556 # https://www.postgresql.org/docs/current/sql-createtable.html 2557 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2558 if not self._match_text_seq("OF"): 2559 self._retreat(self._index - 1) 2560 return None 2561 2562 this = self._parse_table(schema=True) 2563 2564 if self._match(TokenType.DEFAULT): 2565 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2566 elif self._match_text_seq("FOR", "VALUES"): 2567 expression = self._parse_partition_bound_spec() 2568 else: 2569 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2570 2571 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2572 2573 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2574 self._match(TokenType.EQ) 2575 return self.expression( 2576 exp.PartitionedByProperty, 2577 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2578 ) 2579 2580 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2581 if self._match_text_seq("AND", "STATISTICS"): 2582 statistics = True 2583 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2584 statistics = False 2585 else: 2586 statistics = None 2587 2588 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2589 2590 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2591 if self._match_text_seq("SQL"): 2592 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2593 return None 2594 2595 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2596 if self._match_text_seq("SQL", "DATA"): 2597 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2598 return None 2599 2600 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2601 if self._match_text_seq("PRIMARY", "INDEX"): 2602 return exp.NoPrimaryIndexProperty() 2603 if self._match_text_seq("SQL"): 2604 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2605 return None 2606 2607 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2608 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2609 return exp.OnCommitProperty() 2610 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2611 return exp.OnCommitProperty(delete=True) 2612 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2613 2614 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2615 if self._match_text_seq("SQL", "DATA"): 2616 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2617 return None 2618 2619 def _parse_distkey(self) -> exp.DistKeyProperty: 2620 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2621 2622 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2623 table = self._parse_table(schema=True) 2624 2625 options = [] 2626 while self._match_texts(("INCLUDING", "EXCLUDING")): 2627 this = self._prev.text.upper() 2628 2629 id_var = self._parse_id_var() 2630 if not id_var: 2631 return None 2632 2633 options.append( 2634 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2635 ) 2636 2637 return self.expression(exp.LikeProperty, this=table, expressions=options) 2638 2639 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2640 return self.expression( 2641 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2642 ) 2643 2644 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2645 self._match(TokenType.EQ) 2646 return self.expression( 2647 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2648 ) 2649 2650 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2651 self._match_text_seq("WITH", "CONNECTION") 2652 return self.expression( 2653 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2654 ) 2655 2656 def _parse_returns(self) -> exp.ReturnsProperty: 2657 value: t.Optional[exp.Expression] 2658 null = None 2659 is_table = self._match(TokenType.TABLE) 2660 2661 if is_table: 2662 if self._match(TokenType.LT): 2663 value = self.expression( 2664 exp.Schema, 2665 this="TABLE", 2666 expressions=self._parse_csv(self._parse_struct_types), 2667 ) 2668 if not self._match(TokenType.GT): 2669 self.raise_error("Expecting >") 2670 else: 2671 value = self._parse_schema(exp.var("TABLE")) 2672 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2673 null = True 2674 value = None 2675 else: 2676 value = self._parse_types() 2677 2678 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2679 2680 def _parse_describe(self) -> exp.Describe: 2681 kind = self._match_set(self.CREATABLES) and self._prev.text 2682 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2683 if self._match(TokenType.DOT): 2684 style = None 2685 self._retreat(self._index - 2) 2686 2687 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2688 2689 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2690 this = self._parse_statement() 2691 else: 2692 this = self._parse_table(schema=True) 2693 2694 properties = self._parse_properties() 2695 expressions = properties.expressions if properties else None 2696 partition = self._parse_partition() 2697 return self.expression( 2698 exp.Describe, 2699 this=this, 2700 style=style, 2701 kind=kind, 2702 expressions=expressions, 2703 partition=partition, 2704 format=format, 2705 ) 2706 2707 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2708 kind = self._prev.text.upper() 2709 expressions = [] 2710 2711 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2712 if self._match(TokenType.WHEN): 2713 expression = self._parse_disjunction() 2714 self._match(TokenType.THEN) 2715 else: 2716 expression = None 2717 2718 else_ = self._match(TokenType.ELSE) 2719 2720 if not self._match(TokenType.INTO): 2721 return None 2722 2723 return self.expression( 2724 exp.ConditionalInsert, 2725 this=self.expression( 2726 exp.Insert, 2727 this=self._parse_table(schema=True), 2728 expression=self._parse_derived_table_values(), 2729 ), 2730 expression=expression, 2731 else_=else_, 2732 ) 2733 2734 expression = parse_conditional_insert() 2735 while expression is not None: 2736 expressions.append(expression) 2737 expression = parse_conditional_insert() 2738 2739 return self.expression( 2740 exp.MultitableInserts, 2741 kind=kind, 2742 comments=comments, 2743 expressions=expressions, 2744 source=self._parse_table(), 2745 ) 2746 2747 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2748 comments = [] 2749 hint = self._parse_hint() 2750 overwrite = self._match(TokenType.OVERWRITE) 2751 ignore = self._match(TokenType.IGNORE) 2752 local = self._match_text_seq("LOCAL") 2753 alternative = None 2754 is_function = None 2755 2756 if self._match_text_seq("DIRECTORY"): 2757 this: t.Optional[exp.Expression] = self.expression( 2758 exp.Directory, 2759 this=self._parse_var_or_string(), 2760 local=local, 2761 row_format=self._parse_row_format(match_row=True), 2762 ) 2763 else: 2764 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2765 comments += ensure_list(self._prev_comments) 2766 return self._parse_multitable_inserts(comments) 2767 2768 if self._match(TokenType.OR): 2769 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2770 2771 self._match(TokenType.INTO) 2772 comments += ensure_list(self._prev_comments) 2773 self._match(TokenType.TABLE) 2774 is_function = self._match(TokenType.FUNCTION) 2775 2776 this = ( 2777 self._parse_table(schema=True, parse_partition=True) 2778 if not is_function 2779 else self._parse_function() 2780 ) 2781 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2782 this.set("alias", self._parse_table_alias()) 2783 2784 returning = self._parse_returning() 2785 2786 return self.expression( 2787 exp.Insert, 2788 comments=comments, 2789 hint=hint, 2790 is_function=is_function, 2791 this=this, 2792 stored=self._match_text_seq("STORED") and self._parse_stored(), 2793 by_name=self._match_text_seq("BY", "NAME"), 2794 exists=self._parse_exists(), 2795 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2796 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2797 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2798 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2799 conflict=self._parse_on_conflict(), 2800 returning=returning or self._parse_returning(), 2801 overwrite=overwrite, 2802 alternative=alternative, 2803 ignore=ignore, 2804 source=self._match(TokenType.TABLE) and self._parse_table(), 2805 ) 2806 2807 def _parse_kill(self) -> exp.Kill: 2808 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2809 2810 return self.expression( 2811 exp.Kill, 2812 this=self._parse_primary(), 2813 kind=kind, 2814 ) 2815 2816 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2817 conflict = self._match_text_seq("ON", "CONFLICT") 2818 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2819 2820 if not conflict and not duplicate: 2821 return None 2822 2823 conflict_keys = None 2824 constraint = None 2825 2826 if conflict: 2827 if self._match_text_seq("ON", "CONSTRAINT"): 2828 constraint = self._parse_id_var() 2829 elif self._match(TokenType.L_PAREN): 2830 conflict_keys = self._parse_csv(self._parse_id_var) 2831 self._match_r_paren() 2832 2833 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2834 if self._prev.token_type == TokenType.UPDATE: 2835 self._match(TokenType.SET) 2836 expressions = self._parse_csv(self._parse_equality) 2837 else: 2838 expressions = None 2839 2840 return self.expression( 2841 exp.OnConflict, 2842 duplicate=duplicate, 2843 expressions=expressions, 2844 action=action, 2845 conflict_keys=conflict_keys, 2846 constraint=constraint, 2847 where=self._parse_where(), 2848 ) 2849 2850 def _parse_returning(self) -> t.Optional[exp.Returning]: 2851 if not self._match(TokenType.RETURNING): 2852 return None 2853 return self.expression( 2854 exp.Returning, 2855 expressions=self._parse_csv(self._parse_expression), 2856 into=self._match(TokenType.INTO) and self._parse_table_part(), 2857 ) 2858 2859 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2860 if not self._match(TokenType.FORMAT): 2861 return None 2862 return self._parse_row_format() 2863 2864 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2865 index = self._index 2866 with_ = with_ or self._match_text_seq("WITH") 2867 2868 if not self._match(TokenType.SERDE_PROPERTIES): 2869 self._retreat(index) 2870 return None 2871 return self.expression( 2872 exp.SerdeProperties, 2873 **{ # type: ignore 2874 "expressions": self._parse_wrapped_properties(), 2875 "with": with_, 2876 }, 2877 ) 2878 2879 def _parse_row_format( 2880 self, match_row: bool = False 2881 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2882 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2883 return None 2884 2885 if self._match_text_seq("SERDE"): 2886 this = self._parse_string() 2887 2888 serde_properties = self._parse_serde_properties() 2889 2890 return self.expression( 2891 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2892 ) 2893 2894 self._match_text_seq("DELIMITED") 2895 2896 kwargs = {} 2897 2898 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2899 kwargs["fields"] = self._parse_string() 2900 if self._match_text_seq("ESCAPED", "BY"): 2901 kwargs["escaped"] = self._parse_string() 2902 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2903 kwargs["collection_items"] = self._parse_string() 2904 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2905 kwargs["map_keys"] = self._parse_string() 2906 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2907 kwargs["lines"] = self._parse_string() 2908 if self._match_text_seq("NULL", "DEFINED", "AS"): 2909 kwargs["null"] = self._parse_string() 2910 2911 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2912 2913 def _parse_load(self) -> exp.LoadData | exp.Command: 2914 if self._match_text_seq("DATA"): 2915 local = self._match_text_seq("LOCAL") 2916 self._match_text_seq("INPATH") 2917 inpath = self._parse_string() 2918 overwrite = self._match(TokenType.OVERWRITE) 2919 self._match_pair(TokenType.INTO, TokenType.TABLE) 2920 2921 return self.expression( 2922 exp.LoadData, 2923 this=self._parse_table(schema=True), 2924 local=local, 2925 overwrite=overwrite, 2926 inpath=inpath, 2927 partition=self._parse_partition(), 2928 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2929 serde=self._match_text_seq("SERDE") and self._parse_string(), 2930 ) 2931 return self._parse_as_command(self._prev) 2932 2933 def _parse_delete(self) -> exp.Delete: 2934 # This handles MySQL's "Multiple-Table Syntax" 2935 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2936 tables = None 2937 if not self._match(TokenType.FROM, advance=False): 2938 tables = self._parse_csv(self._parse_table) or None 2939 2940 returning = self._parse_returning() 2941 2942 return self.expression( 2943 exp.Delete, 2944 tables=tables, 2945 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2946 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2947 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2948 where=self._parse_where(), 2949 returning=returning or self._parse_returning(), 2950 limit=self._parse_limit(), 2951 ) 2952 2953 def _parse_update(self) -> exp.Update: 2954 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2955 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2956 returning = self._parse_returning() 2957 return self.expression( 2958 exp.Update, 2959 **{ # type: ignore 2960 "this": this, 2961 "expressions": expressions, 2962 "from": self._parse_from(joins=True), 2963 "where": self._parse_where(), 2964 "returning": returning or self._parse_returning(), 2965 "order": self._parse_order(), 2966 "limit": self._parse_limit(), 2967 }, 2968 ) 2969 2970 def _parse_uncache(self) -> exp.Uncache: 2971 if not self._match(TokenType.TABLE): 2972 self.raise_error("Expecting TABLE after UNCACHE") 2973 2974 return self.expression( 2975 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2976 ) 2977 2978 def _parse_cache(self) -> exp.Cache: 2979 lazy = self._match_text_seq("LAZY") 2980 self._match(TokenType.TABLE) 2981 table = self._parse_table(schema=True) 2982 2983 options = [] 2984 if self._match_text_seq("OPTIONS"): 2985 self._match_l_paren() 2986 k = self._parse_string() 2987 self._match(TokenType.EQ) 2988 v = self._parse_string() 2989 options = [k, v] 2990 self._match_r_paren() 2991 2992 self._match(TokenType.ALIAS) 2993 return self.expression( 2994 exp.Cache, 2995 this=table, 2996 lazy=lazy, 2997 options=options, 2998 expression=self._parse_select(nested=True), 2999 ) 3000 3001 def _parse_partition(self) -> t.Optional[exp.Partition]: 3002 if not self._match_texts(self.PARTITION_KEYWORDS): 3003 return None 3004 3005 return self.expression( 3006 exp.Partition, 3007 subpartition=self._prev.text.upper() == "SUBPARTITION", 3008 expressions=self._parse_wrapped_csv(self._parse_assignment), 3009 ) 3010 3011 def _parse_value(self) -> t.Optional[exp.Tuple]: 3012 def _parse_value_expression() -> t.Optional[exp.Expression]: 3013 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3014 return exp.var(self._prev.text.upper()) 3015 return self._parse_expression() 3016 3017 if self._match(TokenType.L_PAREN): 3018 expressions = self._parse_csv(_parse_value_expression) 3019 self._match_r_paren() 3020 return self.expression(exp.Tuple, expressions=expressions) 3021 3022 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3023 expression = self._parse_expression() 3024 if expression: 3025 return self.expression(exp.Tuple, expressions=[expression]) 3026 return None 3027 3028 def _parse_projections(self) -> t.List[exp.Expression]: 3029 return self._parse_expressions() 3030 3031 def _parse_select( 3032 self, 3033 nested: bool = False, 3034 table: bool = False, 3035 parse_subquery_alias: bool = True, 3036 parse_set_operation: bool = True, 3037 ) -> t.Optional[exp.Expression]: 3038 cte = self._parse_with() 3039 3040 if cte: 3041 this = self._parse_statement() 3042 3043 if not this: 3044 self.raise_error("Failed to parse any statement following CTE") 3045 return cte 3046 3047 if "with" in this.arg_types: 3048 this.set("with", cte) 3049 else: 3050 self.raise_error(f"{this.key} does not support CTE") 3051 this = cte 3052 3053 return this 3054 3055 # duckdb supports leading with FROM x 3056 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3057 3058 if self._match(TokenType.SELECT): 3059 comments = self._prev_comments 3060 3061 hint = self._parse_hint() 3062 3063 if self._next and not self._next.token_type == TokenType.DOT: 3064 all_ = self._match(TokenType.ALL) 3065 distinct = self._match_set(self.DISTINCT_TOKENS) 3066 else: 3067 all_, distinct = None, None 3068 3069 kind = ( 3070 self._match(TokenType.ALIAS) 3071 and self._match_texts(("STRUCT", "VALUE")) 3072 and self._prev.text.upper() 3073 ) 3074 3075 if distinct: 3076 distinct = self.expression( 3077 exp.Distinct, 3078 on=self._parse_value() if self._match(TokenType.ON) else None, 3079 ) 3080 3081 if all_ and distinct: 3082 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3083 3084 operation_modifiers = [] 3085 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3086 operation_modifiers.append(exp.var(self._prev.text.upper())) 3087 3088 limit = self._parse_limit(top=True) 3089 projections = self._parse_projections() 3090 3091 this = self.expression( 3092 exp.Select, 3093 kind=kind, 3094 hint=hint, 3095 distinct=distinct, 3096 expressions=projections, 3097 limit=limit, 3098 operation_modifiers=operation_modifiers or None, 3099 ) 3100 this.comments = comments 3101 3102 into = self._parse_into() 3103 if into: 3104 this.set("into", into) 3105 3106 if not from_: 3107 from_ = self._parse_from() 3108 3109 if from_: 3110 this.set("from", from_) 3111 3112 this = self._parse_query_modifiers(this) 3113 elif (table or nested) and self._match(TokenType.L_PAREN): 3114 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3115 this = self._parse_simplified_pivot( 3116 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3117 ) 3118 elif self._match(TokenType.FROM): 3119 from_ = self._parse_from(skip_from_token=True) 3120 # Support parentheses for duckdb FROM-first syntax 3121 select = self._parse_select() 3122 if select: 3123 select.set("from", from_) 3124 this = select 3125 else: 3126 this = exp.select("*").from_(t.cast(exp.From, from_)) 3127 else: 3128 this = ( 3129 self._parse_table() 3130 if table 3131 else self._parse_select(nested=True, parse_set_operation=False) 3132 ) 3133 3134 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3135 # in case a modifier (e.g. join) is following 3136 if table and isinstance(this, exp.Values) and this.alias: 3137 alias = this.args["alias"].pop() 3138 this = exp.Table(this=this, alias=alias) 3139 3140 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3141 3142 self._match_r_paren() 3143 3144 # We return early here so that the UNION isn't attached to the subquery by the 3145 # following call to _parse_set_operations, but instead becomes the parent node 3146 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3147 elif self._match(TokenType.VALUES, advance=False): 3148 this = self._parse_derived_table_values() 3149 elif from_: 3150 this = exp.select("*").from_(from_.this, copy=False) 3151 elif self._match(TokenType.SUMMARIZE): 3152 table = self._match(TokenType.TABLE) 3153 this = self._parse_select() or self._parse_string() or self._parse_table() 3154 return self.expression(exp.Summarize, this=this, table=table) 3155 elif self._match(TokenType.DESCRIBE): 3156 this = self._parse_describe() 3157 elif self._match_text_seq("STREAM"): 3158 this = self._parse_function() 3159 if this: 3160 this = self.expression(exp.Stream, this=this) 3161 else: 3162 self._retreat(self._index - 1) 3163 else: 3164 this = None 3165 3166 return self._parse_set_operations(this) if parse_set_operation else this 3167 3168 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3169 if not skip_with_token and not self._match(TokenType.WITH): 3170 return None 3171 3172 comments = self._prev_comments 3173 recursive = self._match(TokenType.RECURSIVE) 3174 3175 last_comments = None 3176 expressions = [] 3177 while True: 3178 cte = self._parse_cte() 3179 if isinstance(cte, exp.CTE): 3180 expressions.append(cte) 3181 if last_comments: 3182 cte.add_comments(last_comments) 3183 3184 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3185 break 3186 else: 3187 self._match(TokenType.WITH) 3188 3189 last_comments = self._prev_comments 3190 3191 return self.expression( 3192 exp.With, comments=comments, expressions=expressions, recursive=recursive 3193 ) 3194 3195 def _parse_cte(self) -> t.Optional[exp.CTE]: 3196 index = self._index 3197 3198 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3199 if not alias or not alias.this: 3200 self.raise_error("Expected CTE to have alias") 3201 3202 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3203 self._retreat(index) 3204 return None 3205 3206 comments = self._prev_comments 3207 3208 if self._match_text_seq("NOT", "MATERIALIZED"): 3209 materialized = False 3210 elif self._match_text_seq("MATERIALIZED"): 3211 materialized = True 3212 else: 3213 materialized = None 3214 3215 cte = self.expression( 3216 exp.CTE, 3217 this=self._parse_wrapped(self._parse_statement), 3218 alias=alias, 3219 materialized=materialized, 3220 comments=comments, 3221 ) 3222 3223 if isinstance(cte.this, exp.Values): 3224 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3225 3226 return cte 3227 3228 def _parse_table_alias( 3229 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3230 ) -> t.Optional[exp.TableAlias]: 3231 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3232 # so this section tries to parse the clause version and if it fails, it treats the token 3233 # as an identifier (alias) 3234 if self._can_parse_limit_or_offset(): 3235 return None 3236 3237 any_token = self._match(TokenType.ALIAS) 3238 alias = ( 3239 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3240 or self._parse_string_as_identifier() 3241 ) 3242 3243 index = self._index 3244 if self._match(TokenType.L_PAREN): 3245 columns = self._parse_csv(self._parse_function_parameter) 3246 self._match_r_paren() if columns else self._retreat(index) 3247 else: 3248 columns = None 3249 3250 if not alias and not columns: 3251 return None 3252 3253 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3254 3255 # We bubble up comments from the Identifier to the TableAlias 3256 if isinstance(alias, exp.Identifier): 3257 table_alias.add_comments(alias.pop_comments()) 3258 3259 return table_alias 3260 3261 def _parse_subquery( 3262 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3263 ) -> t.Optional[exp.Subquery]: 3264 if not this: 3265 return None 3266 3267 return self.expression( 3268 exp.Subquery, 3269 this=this, 3270 pivots=self._parse_pivots(), 3271 alias=self._parse_table_alias() if parse_alias else None, 3272 sample=self._parse_table_sample(), 3273 ) 3274 3275 def _implicit_unnests_to_explicit(self, this: E) -> E: 3276 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3277 3278 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3279 for i, join in enumerate(this.args.get("joins") or []): 3280 table = join.this 3281 normalized_table = table.copy() 3282 normalized_table.meta["maybe_column"] = True 3283 normalized_table = _norm(normalized_table, dialect=self.dialect) 3284 3285 if isinstance(table, exp.Table) and not join.args.get("on"): 3286 if normalized_table.parts[0].name in refs: 3287 table_as_column = table.to_column() 3288 unnest = exp.Unnest(expressions=[table_as_column]) 3289 3290 # Table.to_column creates a parent Alias node that we want to convert to 3291 # a TableAlias and attach to the Unnest, so it matches the parser's output 3292 if isinstance(table.args.get("alias"), exp.TableAlias): 3293 table_as_column.replace(table_as_column.this) 3294 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3295 3296 table.replace(unnest) 3297 3298 refs.add(normalized_table.alias_or_name) 3299 3300 return this 3301 3302 def _parse_query_modifiers( 3303 self, this: t.Optional[exp.Expression] 3304 ) -> t.Optional[exp.Expression]: 3305 if isinstance(this, (exp.Query, exp.Table)): 3306 for join in self._parse_joins(): 3307 this.append("joins", join) 3308 for lateral in iter(self._parse_lateral, None): 3309 this.append("laterals", lateral) 3310 3311 while True: 3312 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3313 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3314 key, expression = parser(self) 3315 3316 if expression: 3317 this.set(key, expression) 3318 if key == "limit": 3319 offset = expression.args.pop("offset", None) 3320 3321 if offset: 3322 offset = exp.Offset(expression=offset) 3323 this.set("offset", offset) 3324 3325 limit_by_expressions = expression.expressions 3326 expression.set("expressions", None) 3327 offset.set("expressions", limit_by_expressions) 3328 continue 3329 break 3330 3331 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3332 this = self._implicit_unnests_to_explicit(this) 3333 3334 return this 3335 3336 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3337 start = self._curr 3338 while self._curr: 3339 self._advance() 3340 3341 end = self._tokens[self._index - 1] 3342 return exp.Hint(expressions=[self._find_sql(start, end)]) 3343 3344 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3345 return self._parse_function_call() 3346 3347 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3348 start_index = self._index 3349 should_fallback_to_string = False 3350 3351 hints = [] 3352 try: 3353 for hint in iter( 3354 lambda: self._parse_csv( 3355 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3356 ), 3357 [], 3358 ): 3359 hints.extend(hint) 3360 except ParseError: 3361 should_fallback_to_string = True 3362 3363 if should_fallback_to_string or self._curr: 3364 self._retreat(start_index) 3365 return self._parse_hint_fallback_to_string() 3366 3367 return self.expression(exp.Hint, expressions=hints) 3368 3369 def _parse_hint(self) -> t.Optional[exp.Hint]: 3370 if self._match(TokenType.HINT) and self._prev_comments: 3371 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3372 3373 return None 3374 3375 def _parse_into(self) -> t.Optional[exp.Into]: 3376 if not self._match(TokenType.INTO): 3377 return None 3378 3379 temp = self._match(TokenType.TEMPORARY) 3380 unlogged = self._match_text_seq("UNLOGGED") 3381 self._match(TokenType.TABLE) 3382 3383 return self.expression( 3384 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3385 ) 3386 3387 def _parse_from( 3388 self, joins: bool = False, skip_from_token: bool = False 3389 ) -> t.Optional[exp.From]: 3390 if not skip_from_token and not self._match(TokenType.FROM): 3391 return None 3392 3393 return self.expression( 3394 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3395 ) 3396 3397 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3398 return self.expression( 3399 exp.MatchRecognizeMeasure, 3400 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3401 this=self._parse_expression(), 3402 ) 3403 3404 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3405 if not self._match(TokenType.MATCH_RECOGNIZE): 3406 return None 3407 3408 self._match_l_paren() 3409 3410 partition = self._parse_partition_by() 3411 order = self._parse_order() 3412 3413 measures = ( 3414 self._parse_csv(self._parse_match_recognize_measure) 3415 if self._match_text_seq("MEASURES") 3416 else None 3417 ) 3418 3419 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3420 rows = exp.var("ONE ROW PER MATCH") 3421 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3422 text = "ALL ROWS PER MATCH" 3423 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3424 text += " SHOW EMPTY MATCHES" 3425 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3426 text += " OMIT EMPTY MATCHES" 3427 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3428 text += " WITH UNMATCHED ROWS" 3429 rows = exp.var(text) 3430 else: 3431 rows = None 3432 3433 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3434 text = "AFTER MATCH SKIP" 3435 if self._match_text_seq("PAST", "LAST", "ROW"): 3436 text += " PAST LAST ROW" 3437 elif self._match_text_seq("TO", "NEXT", "ROW"): 3438 text += " TO NEXT ROW" 3439 elif self._match_text_seq("TO", "FIRST"): 3440 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3441 elif self._match_text_seq("TO", "LAST"): 3442 text += f" TO LAST {self._advance_any().text}" # type: ignore 3443 after = exp.var(text) 3444 else: 3445 after = None 3446 3447 if self._match_text_seq("PATTERN"): 3448 self._match_l_paren() 3449 3450 if not self._curr: 3451 self.raise_error("Expecting )", self._curr) 3452 3453 paren = 1 3454 start = self._curr 3455 3456 while self._curr and paren > 0: 3457 if self._curr.token_type == TokenType.L_PAREN: 3458 paren += 1 3459 if self._curr.token_type == TokenType.R_PAREN: 3460 paren -= 1 3461 3462 end = self._prev 3463 self._advance() 3464 3465 if paren > 0: 3466 self.raise_error("Expecting )", self._curr) 3467 3468 pattern = exp.var(self._find_sql(start, end)) 3469 else: 3470 pattern = None 3471 3472 define = ( 3473 self._parse_csv(self._parse_name_as_expression) 3474 if self._match_text_seq("DEFINE") 3475 else None 3476 ) 3477 3478 self._match_r_paren() 3479 3480 return self.expression( 3481 exp.MatchRecognize, 3482 partition_by=partition, 3483 order=order, 3484 measures=measures, 3485 rows=rows, 3486 after=after, 3487 pattern=pattern, 3488 define=define, 3489 alias=self._parse_table_alias(), 3490 ) 3491 3492 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3493 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3494 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3495 cross_apply = False 3496 3497 if cross_apply is not None: 3498 this = self._parse_select(table=True) 3499 view = None 3500 outer = None 3501 elif self._match(TokenType.LATERAL): 3502 this = self._parse_select(table=True) 3503 view = self._match(TokenType.VIEW) 3504 outer = self._match(TokenType.OUTER) 3505 else: 3506 return None 3507 3508 if not this: 3509 this = ( 3510 self._parse_unnest() 3511 or self._parse_function() 3512 or self._parse_id_var(any_token=False) 3513 ) 3514 3515 while self._match(TokenType.DOT): 3516 this = exp.Dot( 3517 this=this, 3518 expression=self._parse_function() or self._parse_id_var(any_token=False), 3519 ) 3520 3521 if view: 3522 table = self._parse_id_var(any_token=False) 3523 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3524 table_alias: t.Optional[exp.TableAlias] = self.expression( 3525 exp.TableAlias, this=table, columns=columns 3526 ) 3527 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3528 # We move the alias from the lateral's child node to the lateral itself 3529 table_alias = this.args["alias"].pop() 3530 else: 3531 table_alias = self._parse_table_alias() 3532 3533 return self.expression( 3534 exp.Lateral, 3535 this=this, 3536 view=view, 3537 outer=outer, 3538 alias=table_alias, 3539 cross_apply=cross_apply, 3540 ) 3541 3542 def _parse_join_parts( 3543 self, 3544 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3545 return ( 3546 self._match_set(self.JOIN_METHODS) and self._prev, 3547 self._match_set(self.JOIN_SIDES) and self._prev, 3548 self._match_set(self.JOIN_KINDS) and self._prev, 3549 ) 3550 3551 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3552 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3553 this = self._parse_column() 3554 if isinstance(this, exp.Column): 3555 return this.this 3556 return this 3557 3558 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3559 3560 def _parse_join( 3561 self, skip_join_token: bool = False, parse_bracket: bool = False 3562 ) -> t.Optional[exp.Join]: 3563 if self._match(TokenType.COMMA): 3564 return self.expression(exp.Join, this=self._parse_table()) 3565 3566 index = self._index 3567 method, side, kind = self._parse_join_parts() 3568 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3569 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3570 3571 if not skip_join_token and not join: 3572 self._retreat(index) 3573 kind = None 3574 method = None 3575 side = None 3576 3577 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3578 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3579 3580 if not skip_join_token and not join and not outer_apply and not cross_apply: 3581 return None 3582 3583 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3584 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3585 kwargs["expressions"] = self._parse_csv( 3586 lambda: self._parse_table(parse_bracket=parse_bracket) 3587 ) 3588 3589 if method: 3590 kwargs["method"] = method.text 3591 if side: 3592 kwargs["side"] = side.text 3593 if kind: 3594 kwargs["kind"] = kind.text 3595 if hint: 3596 kwargs["hint"] = hint 3597 3598 if self._match(TokenType.MATCH_CONDITION): 3599 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3600 3601 if self._match(TokenType.ON): 3602 kwargs["on"] = self._parse_assignment() 3603 elif self._match(TokenType.USING): 3604 kwargs["using"] = self._parse_using_identifiers() 3605 elif ( 3606 not (outer_apply or cross_apply) 3607 and not isinstance(kwargs["this"], exp.Unnest) 3608 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3609 ): 3610 index = self._index 3611 joins: t.Optional[list] = list(self._parse_joins()) 3612 3613 if joins and self._match(TokenType.ON): 3614 kwargs["on"] = self._parse_assignment() 3615 elif joins and self._match(TokenType.USING): 3616 kwargs["using"] = self._parse_using_identifiers() 3617 else: 3618 joins = None 3619 self._retreat(index) 3620 3621 kwargs["this"].set("joins", joins if joins else None) 3622 3623 comments = [c for token in (method, side, kind) if token for c in token.comments] 3624 return self.expression(exp.Join, comments=comments, **kwargs) 3625 3626 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3627 this = self._parse_assignment() 3628 3629 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3630 return this 3631 3632 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3633 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3634 3635 return this 3636 3637 def _parse_index_params(self) -> exp.IndexParameters: 3638 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3639 3640 if self._match(TokenType.L_PAREN, advance=False): 3641 columns = self._parse_wrapped_csv(self._parse_with_operator) 3642 else: 3643 columns = None 3644 3645 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3646 partition_by = self._parse_partition_by() 3647 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3648 tablespace = ( 3649 self._parse_var(any_token=True) 3650 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3651 else None 3652 ) 3653 where = self._parse_where() 3654 3655 on = self._parse_field() if self._match(TokenType.ON) else None 3656 3657 return self.expression( 3658 exp.IndexParameters, 3659 using=using, 3660 columns=columns, 3661 include=include, 3662 partition_by=partition_by, 3663 where=where, 3664 with_storage=with_storage, 3665 tablespace=tablespace, 3666 on=on, 3667 ) 3668 3669 def _parse_index( 3670 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3671 ) -> t.Optional[exp.Index]: 3672 if index or anonymous: 3673 unique = None 3674 primary = None 3675 amp = None 3676 3677 self._match(TokenType.ON) 3678 self._match(TokenType.TABLE) # hive 3679 table = self._parse_table_parts(schema=True) 3680 else: 3681 unique = self._match(TokenType.UNIQUE) 3682 primary = self._match_text_seq("PRIMARY") 3683 amp = self._match_text_seq("AMP") 3684 3685 if not self._match(TokenType.INDEX): 3686 return None 3687 3688 index = self._parse_id_var() 3689 table = None 3690 3691 params = self._parse_index_params() 3692 3693 return self.expression( 3694 exp.Index, 3695 this=index, 3696 table=table, 3697 unique=unique, 3698 primary=primary, 3699 amp=amp, 3700 params=params, 3701 ) 3702 3703 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3704 hints: t.List[exp.Expression] = [] 3705 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3706 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3707 hints.append( 3708 self.expression( 3709 exp.WithTableHint, 3710 expressions=self._parse_csv( 3711 lambda: self._parse_function() or self._parse_var(any_token=True) 3712 ), 3713 ) 3714 ) 3715 self._match_r_paren() 3716 else: 3717 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3718 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3719 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3720 3721 self._match_set((TokenType.INDEX, TokenType.KEY)) 3722 if self._match(TokenType.FOR): 3723 hint.set("target", self._advance_any() and self._prev.text.upper()) 3724 3725 hint.set("expressions", self._parse_wrapped_id_vars()) 3726 hints.append(hint) 3727 3728 return hints or None 3729 3730 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3731 return ( 3732 (not schema and self._parse_function(optional_parens=False)) 3733 or self._parse_id_var(any_token=False) 3734 or self._parse_string_as_identifier() 3735 or self._parse_placeholder() 3736 ) 3737 3738 def _parse_table_parts( 3739 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3740 ) -> exp.Table: 3741 catalog = None 3742 db = None 3743 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3744 3745 while self._match(TokenType.DOT): 3746 if catalog: 3747 # This allows nesting the table in arbitrarily many dot expressions if needed 3748 table = self.expression( 3749 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3750 ) 3751 else: 3752 catalog = db 3753 db = table 3754 # "" used for tsql FROM a..b case 3755 table = self._parse_table_part(schema=schema) or "" 3756 3757 if ( 3758 wildcard 3759 and self._is_connected() 3760 and (isinstance(table, exp.Identifier) or not table) 3761 and self._match(TokenType.STAR) 3762 ): 3763 if isinstance(table, exp.Identifier): 3764 table.args["this"] += "*" 3765 else: 3766 table = exp.Identifier(this="*") 3767 3768 # We bubble up comments from the Identifier to the Table 3769 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3770 3771 if is_db_reference: 3772 catalog = db 3773 db = table 3774 table = None 3775 3776 if not table and not is_db_reference: 3777 self.raise_error(f"Expected table name but got {self._curr}") 3778 if not db and is_db_reference: 3779 self.raise_error(f"Expected database name but got {self._curr}") 3780 3781 table = self.expression( 3782 exp.Table, 3783 comments=comments, 3784 this=table, 3785 db=db, 3786 catalog=catalog, 3787 ) 3788 3789 changes = self._parse_changes() 3790 if changes: 3791 table.set("changes", changes) 3792 3793 at_before = self._parse_historical_data() 3794 if at_before: 3795 table.set("when", at_before) 3796 3797 pivots = self._parse_pivots() 3798 if pivots: 3799 table.set("pivots", pivots) 3800 3801 return table 3802 3803 def _parse_table( 3804 self, 3805 schema: bool = False, 3806 joins: bool = False, 3807 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3808 parse_bracket: bool = False, 3809 is_db_reference: bool = False, 3810 parse_partition: bool = False, 3811 ) -> t.Optional[exp.Expression]: 3812 lateral = self._parse_lateral() 3813 if lateral: 3814 return lateral 3815 3816 unnest = self._parse_unnest() 3817 if unnest: 3818 return unnest 3819 3820 values = self._parse_derived_table_values() 3821 if values: 3822 return values 3823 3824 subquery = self._parse_select(table=True) 3825 if subquery: 3826 if not subquery.args.get("pivots"): 3827 subquery.set("pivots", self._parse_pivots()) 3828 return subquery 3829 3830 bracket = parse_bracket and self._parse_bracket(None) 3831 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3832 3833 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3834 self._parse_table 3835 ) 3836 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3837 3838 only = self._match(TokenType.ONLY) 3839 3840 this = t.cast( 3841 exp.Expression, 3842 bracket 3843 or rows_from 3844 or self._parse_bracket( 3845 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3846 ), 3847 ) 3848 3849 if only: 3850 this.set("only", only) 3851 3852 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3853 self._match_text_seq("*") 3854 3855 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3856 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3857 this.set("partition", self._parse_partition()) 3858 3859 if schema: 3860 return self._parse_schema(this=this) 3861 3862 version = self._parse_version() 3863 3864 if version: 3865 this.set("version", version) 3866 3867 if self.dialect.ALIAS_POST_TABLESAMPLE: 3868 this.set("sample", self._parse_table_sample()) 3869 3870 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3871 if alias: 3872 this.set("alias", alias) 3873 3874 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3875 return self.expression( 3876 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3877 ) 3878 3879 this.set("hints", self._parse_table_hints()) 3880 3881 if not this.args.get("pivots"): 3882 this.set("pivots", self._parse_pivots()) 3883 3884 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3885 this.set("sample", self._parse_table_sample()) 3886 3887 if joins: 3888 for join in self._parse_joins(): 3889 this.append("joins", join) 3890 3891 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3892 this.set("ordinality", True) 3893 this.set("alias", self._parse_table_alias()) 3894 3895 return this 3896 3897 def _parse_version(self) -> t.Optional[exp.Version]: 3898 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3899 this = "TIMESTAMP" 3900 elif self._match(TokenType.VERSION_SNAPSHOT): 3901 this = "VERSION" 3902 else: 3903 return None 3904 3905 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3906 kind = self._prev.text.upper() 3907 start = self._parse_bitwise() 3908 self._match_texts(("TO", "AND")) 3909 end = self._parse_bitwise() 3910 expression: t.Optional[exp.Expression] = self.expression( 3911 exp.Tuple, expressions=[start, end] 3912 ) 3913 elif self._match_text_seq("CONTAINED", "IN"): 3914 kind = "CONTAINED IN" 3915 expression = self.expression( 3916 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3917 ) 3918 elif self._match(TokenType.ALL): 3919 kind = "ALL" 3920 expression = None 3921 else: 3922 self._match_text_seq("AS", "OF") 3923 kind = "AS OF" 3924 expression = self._parse_type() 3925 3926 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3927 3928 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3929 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3930 index = self._index 3931 historical_data = None 3932 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3933 this = self._prev.text.upper() 3934 kind = ( 3935 self._match(TokenType.L_PAREN) 3936 and self._match_texts(self.HISTORICAL_DATA_KIND) 3937 and self._prev.text.upper() 3938 ) 3939 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3940 3941 if expression: 3942 self._match_r_paren() 3943 historical_data = self.expression( 3944 exp.HistoricalData, this=this, kind=kind, expression=expression 3945 ) 3946 else: 3947 self._retreat(index) 3948 3949 return historical_data 3950 3951 def _parse_changes(self) -> t.Optional[exp.Changes]: 3952 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3953 return None 3954 3955 information = self._parse_var(any_token=True) 3956 self._match_r_paren() 3957 3958 return self.expression( 3959 exp.Changes, 3960 information=information, 3961 at_before=self._parse_historical_data(), 3962 end=self._parse_historical_data(), 3963 ) 3964 3965 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3966 if not self._match(TokenType.UNNEST): 3967 return None 3968 3969 expressions = self._parse_wrapped_csv(self._parse_equality) 3970 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3971 3972 alias = self._parse_table_alias() if with_alias else None 3973 3974 if alias: 3975 if self.dialect.UNNEST_COLUMN_ONLY: 3976 if alias.args.get("columns"): 3977 self.raise_error("Unexpected extra column alias in unnest.") 3978 3979 alias.set("columns", [alias.this]) 3980 alias.set("this", None) 3981 3982 columns = alias.args.get("columns") or [] 3983 if offset and len(expressions) < len(columns): 3984 offset = columns.pop() 3985 3986 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3987 self._match(TokenType.ALIAS) 3988 offset = self._parse_id_var( 3989 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3990 ) or exp.to_identifier("offset") 3991 3992 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3993 3994 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3995 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3996 if not is_derived and not ( 3997 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3998 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3999 ): 4000 return None 4001 4002 expressions = self._parse_csv(self._parse_value) 4003 alias = self._parse_table_alias() 4004 4005 if is_derived: 4006 self._match_r_paren() 4007 4008 return self.expression( 4009 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4010 ) 4011 4012 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4013 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4014 as_modifier and self._match_text_seq("USING", "SAMPLE") 4015 ): 4016 return None 4017 4018 bucket_numerator = None 4019 bucket_denominator = None 4020 bucket_field = None 4021 percent = None 4022 size = None 4023 seed = None 4024 4025 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4026 matched_l_paren = self._match(TokenType.L_PAREN) 4027 4028 if self.TABLESAMPLE_CSV: 4029 num = None 4030 expressions = self._parse_csv(self._parse_primary) 4031 else: 4032 expressions = None 4033 num = ( 4034 self._parse_factor() 4035 if self._match(TokenType.NUMBER, advance=False) 4036 else self._parse_primary() or self._parse_placeholder() 4037 ) 4038 4039 if self._match_text_seq("BUCKET"): 4040 bucket_numerator = self._parse_number() 4041 self._match_text_seq("OUT", "OF") 4042 bucket_denominator = bucket_denominator = self._parse_number() 4043 self._match(TokenType.ON) 4044 bucket_field = self._parse_field() 4045 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4046 percent = num 4047 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4048 size = num 4049 else: 4050 percent = num 4051 4052 if matched_l_paren: 4053 self._match_r_paren() 4054 4055 if self._match(TokenType.L_PAREN): 4056 method = self._parse_var(upper=True) 4057 seed = self._match(TokenType.COMMA) and self._parse_number() 4058 self._match_r_paren() 4059 elif self._match_texts(("SEED", "REPEATABLE")): 4060 seed = self._parse_wrapped(self._parse_number) 4061 4062 if not method and self.DEFAULT_SAMPLING_METHOD: 4063 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4064 4065 return self.expression( 4066 exp.TableSample, 4067 expressions=expressions, 4068 method=method, 4069 bucket_numerator=bucket_numerator, 4070 bucket_denominator=bucket_denominator, 4071 bucket_field=bucket_field, 4072 percent=percent, 4073 size=size, 4074 seed=seed, 4075 ) 4076 4077 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4078 return list(iter(self._parse_pivot, None)) or None 4079 4080 def _parse_joins(self) -> t.Iterator[exp.Join]: 4081 return iter(self._parse_join, None) 4082 4083 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4084 if not self._match(TokenType.INTO): 4085 return None 4086 4087 return self.expression( 4088 exp.UnpivotColumns, 4089 this=self._match_text_seq("NAME") and self._parse_column(), 4090 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4091 ) 4092 4093 # https://duckdb.org/docs/sql/statements/pivot 4094 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4095 def _parse_on() -> t.Optional[exp.Expression]: 4096 this = self._parse_bitwise() 4097 4098 if self._match(TokenType.IN): 4099 # PIVOT ... ON col IN (row_val1, row_val2) 4100 return self._parse_in(this) 4101 if self._match(TokenType.ALIAS, advance=False): 4102 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4103 return self._parse_alias(this) 4104 4105 return this 4106 4107 this = self._parse_table() 4108 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4109 into = self._parse_unpivot_columns() 4110 using = self._match(TokenType.USING) and self._parse_csv( 4111 lambda: self._parse_alias(self._parse_function()) 4112 ) 4113 group = self._parse_group() 4114 4115 return self.expression( 4116 exp.Pivot, 4117 this=this, 4118 expressions=expressions, 4119 using=using, 4120 group=group, 4121 unpivot=is_unpivot, 4122 into=into, 4123 ) 4124 4125 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4126 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4127 this = self._parse_select_or_expression() 4128 4129 self._match(TokenType.ALIAS) 4130 alias = self._parse_bitwise() 4131 if alias: 4132 if isinstance(alias, exp.Column) and not alias.db: 4133 alias = alias.this 4134 return self.expression(exp.PivotAlias, this=this, alias=alias) 4135 4136 return this 4137 4138 value = self._parse_column() 4139 4140 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4141 self.raise_error("Expecting IN (") 4142 4143 if self._match(TokenType.ANY): 4144 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4145 else: 4146 exprs = self._parse_csv(_parse_aliased_expression) 4147 4148 self._match_r_paren() 4149 return self.expression(exp.In, this=value, expressions=exprs) 4150 4151 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4152 index = self._index 4153 include_nulls = None 4154 4155 if self._match(TokenType.PIVOT): 4156 unpivot = False 4157 elif self._match(TokenType.UNPIVOT): 4158 unpivot = True 4159 4160 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4161 if self._match_text_seq("INCLUDE", "NULLS"): 4162 include_nulls = True 4163 elif self._match_text_seq("EXCLUDE", "NULLS"): 4164 include_nulls = False 4165 else: 4166 return None 4167 4168 expressions = [] 4169 4170 if not self._match(TokenType.L_PAREN): 4171 self._retreat(index) 4172 return None 4173 4174 if unpivot: 4175 expressions = self._parse_csv(self._parse_column) 4176 else: 4177 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4178 4179 if not expressions: 4180 self.raise_error("Failed to parse PIVOT's aggregation list") 4181 4182 if not self._match(TokenType.FOR): 4183 self.raise_error("Expecting FOR") 4184 4185 field = self._parse_pivot_in() 4186 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4187 self._parse_bitwise 4188 ) 4189 4190 self._match_r_paren() 4191 4192 pivot = self.expression( 4193 exp.Pivot, 4194 expressions=expressions, 4195 field=field, 4196 unpivot=unpivot, 4197 include_nulls=include_nulls, 4198 default_on_null=default_on_null, 4199 ) 4200 4201 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4202 pivot.set("alias", self._parse_table_alias()) 4203 4204 if not unpivot: 4205 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4206 4207 columns: t.List[exp.Expression] = [] 4208 for fld in pivot.args["field"].expressions: 4209 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4210 for name in names: 4211 if self.PREFIXED_PIVOT_COLUMNS: 4212 name = f"{name}_{field_name}" if name else field_name 4213 else: 4214 name = f"{field_name}_{name}" if name else field_name 4215 4216 columns.append(exp.to_identifier(name)) 4217 4218 pivot.set("columns", columns) 4219 4220 return pivot 4221 4222 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4223 return [agg.alias for agg in aggregations] 4224 4225 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4226 if not skip_where_token and not self._match(TokenType.PREWHERE): 4227 return None 4228 4229 return self.expression( 4230 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4231 ) 4232 4233 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4234 if not skip_where_token and not self._match(TokenType.WHERE): 4235 return None 4236 4237 return self.expression( 4238 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4239 ) 4240 4241 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4242 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4243 return None 4244 4245 elements: t.Dict[str, t.Any] = defaultdict(list) 4246 4247 if self._match(TokenType.ALL): 4248 elements["all"] = True 4249 elif self._match(TokenType.DISTINCT): 4250 elements["all"] = False 4251 4252 while True: 4253 index = self._index 4254 4255 elements["expressions"].extend( 4256 self._parse_csv( 4257 lambda: None 4258 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4259 else self._parse_assignment() 4260 ) 4261 ) 4262 4263 before_with_index = self._index 4264 with_prefix = self._match(TokenType.WITH) 4265 4266 if self._match(TokenType.ROLLUP): 4267 elements["rollup"].append( 4268 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4269 ) 4270 elif self._match(TokenType.CUBE): 4271 elements["cube"].append( 4272 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4273 ) 4274 elif self._match(TokenType.GROUPING_SETS): 4275 elements["grouping_sets"].append( 4276 self.expression( 4277 exp.GroupingSets, 4278 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4279 ) 4280 ) 4281 elif self._match_text_seq("TOTALS"): 4282 elements["totals"] = True # type: ignore 4283 4284 if before_with_index <= self._index <= before_with_index + 1: 4285 self._retreat(before_with_index) 4286 break 4287 4288 if index == self._index: 4289 break 4290 4291 return self.expression(exp.Group, **elements) # type: ignore 4292 4293 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4294 return self.expression( 4295 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4296 ) 4297 4298 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4299 if self._match(TokenType.L_PAREN): 4300 grouping_set = self._parse_csv(self._parse_column) 4301 self._match_r_paren() 4302 return self.expression(exp.Tuple, expressions=grouping_set) 4303 4304 return self._parse_column() 4305 4306 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4307 if not skip_having_token and not self._match(TokenType.HAVING): 4308 return None 4309 return self.expression(exp.Having, this=self._parse_assignment()) 4310 4311 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4312 if not self._match(TokenType.QUALIFY): 4313 return None 4314 return self.expression(exp.Qualify, this=self._parse_assignment()) 4315 4316 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4317 if skip_start_token: 4318 start = None 4319 elif self._match(TokenType.START_WITH): 4320 start = self._parse_assignment() 4321 else: 4322 return None 4323 4324 self._match(TokenType.CONNECT_BY) 4325 nocycle = self._match_text_seq("NOCYCLE") 4326 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4327 exp.Prior, this=self._parse_bitwise() 4328 ) 4329 connect = self._parse_assignment() 4330 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4331 4332 if not start and self._match(TokenType.START_WITH): 4333 start = self._parse_assignment() 4334 4335 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4336 4337 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4338 this = self._parse_id_var(any_token=True) 4339 if self._match(TokenType.ALIAS): 4340 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4341 return this 4342 4343 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4344 if self._match_text_seq("INTERPOLATE"): 4345 return self._parse_wrapped_csv(self._parse_name_as_expression) 4346 return None 4347 4348 def _parse_order( 4349 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4350 ) -> t.Optional[exp.Expression]: 4351 siblings = None 4352 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4353 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4354 return this 4355 4356 siblings = True 4357 4358 return self.expression( 4359 exp.Order, 4360 this=this, 4361 expressions=self._parse_csv(self._parse_ordered), 4362 siblings=siblings, 4363 ) 4364 4365 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4366 if not self._match(token): 4367 return None 4368 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4369 4370 def _parse_ordered( 4371 self, parse_method: t.Optional[t.Callable] = None 4372 ) -> t.Optional[exp.Ordered]: 4373 this = parse_method() if parse_method else self._parse_assignment() 4374 if not this: 4375 return None 4376 4377 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4378 this = exp.var("ALL") 4379 4380 asc = self._match(TokenType.ASC) 4381 desc = self._match(TokenType.DESC) or (asc and False) 4382 4383 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4384 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4385 4386 nulls_first = is_nulls_first or False 4387 explicitly_null_ordered = is_nulls_first or is_nulls_last 4388 4389 if ( 4390 not explicitly_null_ordered 4391 and ( 4392 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4393 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4394 ) 4395 and self.dialect.NULL_ORDERING != "nulls_are_last" 4396 ): 4397 nulls_first = True 4398 4399 if self._match_text_seq("WITH", "FILL"): 4400 with_fill = self.expression( 4401 exp.WithFill, 4402 **{ # type: ignore 4403 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4404 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4405 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4406 "interpolate": self._parse_interpolate(), 4407 }, 4408 ) 4409 else: 4410 with_fill = None 4411 4412 return self.expression( 4413 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4414 ) 4415 4416 def _parse_limit( 4417 self, 4418 this: t.Optional[exp.Expression] = None, 4419 top: bool = False, 4420 skip_limit_token: bool = False, 4421 ) -> t.Optional[exp.Expression]: 4422 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4423 comments = self._prev_comments 4424 if top: 4425 limit_paren = self._match(TokenType.L_PAREN) 4426 expression = self._parse_term() if limit_paren else self._parse_number() 4427 4428 if limit_paren: 4429 self._match_r_paren() 4430 else: 4431 expression = self._parse_term() 4432 4433 if self._match(TokenType.COMMA): 4434 offset = expression 4435 expression = self._parse_term() 4436 else: 4437 offset = None 4438 4439 limit_exp = self.expression( 4440 exp.Limit, 4441 this=this, 4442 expression=expression, 4443 offset=offset, 4444 comments=comments, 4445 expressions=self._parse_limit_by(), 4446 ) 4447 4448 return limit_exp 4449 4450 if self._match(TokenType.FETCH): 4451 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4452 direction = self._prev.text.upper() if direction else "FIRST" 4453 4454 count = self._parse_field(tokens=self.FETCH_TOKENS) 4455 percent = self._match(TokenType.PERCENT) 4456 4457 self._match_set((TokenType.ROW, TokenType.ROWS)) 4458 4459 only = self._match_text_seq("ONLY") 4460 with_ties = self._match_text_seq("WITH", "TIES") 4461 4462 if only and with_ties: 4463 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4464 4465 return self.expression( 4466 exp.Fetch, 4467 direction=direction, 4468 count=count, 4469 percent=percent, 4470 with_ties=with_ties, 4471 ) 4472 4473 return this 4474 4475 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4476 if not self._match(TokenType.OFFSET): 4477 return this 4478 4479 count = self._parse_term() 4480 self._match_set((TokenType.ROW, TokenType.ROWS)) 4481 4482 return self.expression( 4483 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4484 ) 4485 4486 def _can_parse_limit_or_offset(self) -> bool: 4487 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4488 return False 4489 4490 index = self._index 4491 result = bool( 4492 self._try_parse(self._parse_limit, retreat=True) 4493 or self._try_parse(self._parse_offset, retreat=True) 4494 ) 4495 self._retreat(index) 4496 return result 4497 4498 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4499 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4500 4501 def _parse_locks(self) -> t.List[exp.Lock]: 4502 locks = [] 4503 while True: 4504 if self._match_text_seq("FOR", "UPDATE"): 4505 update = True 4506 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4507 "LOCK", "IN", "SHARE", "MODE" 4508 ): 4509 update = False 4510 else: 4511 break 4512 4513 expressions = None 4514 if self._match_text_seq("OF"): 4515 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4516 4517 wait: t.Optional[bool | exp.Expression] = None 4518 if self._match_text_seq("NOWAIT"): 4519 wait = True 4520 elif self._match_text_seq("WAIT"): 4521 wait = self._parse_primary() 4522 elif self._match_text_seq("SKIP", "LOCKED"): 4523 wait = False 4524 4525 locks.append( 4526 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4527 ) 4528 4529 return locks 4530 4531 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4532 while this and self._match_set(self.SET_OPERATIONS): 4533 token_type = self._prev.token_type 4534 4535 if token_type == TokenType.UNION: 4536 operation: t.Type[exp.SetOperation] = exp.Union 4537 elif token_type == TokenType.EXCEPT: 4538 operation = exp.Except 4539 else: 4540 operation = exp.Intersect 4541 4542 comments = self._prev.comments 4543 4544 if self._match(TokenType.DISTINCT): 4545 distinct: t.Optional[bool] = True 4546 elif self._match(TokenType.ALL): 4547 distinct = False 4548 else: 4549 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4550 if distinct is None: 4551 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4552 4553 by_name = self._match_text_seq("BY", "NAME") 4554 expression = self._parse_select(nested=True, parse_set_operation=False) 4555 4556 this = self.expression( 4557 operation, 4558 comments=comments, 4559 this=this, 4560 distinct=distinct, 4561 by_name=by_name, 4562 expression=expression, 4563 ) 4564 4565 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4566 expression = this.expression 4567 4568 if expression: 4569 for arg in self.SET_OP_MODIFIERS: 4570 expr = expression.args.get(arg) 4571 if expr: 4572 this.set(arg, expr.pop()) 4573 4574 return this 4575 4576 def _parse_expression(self) -> t.Optional[exp.Expression]: 4577 return self._parse_alias(self._parse_assignment()) 4578 4579 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4580 this = self._parse_disjunction() 4581 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4582 # This allows us to parse <non-identifier token> := <expr> 4583 this = exp.column( 4584 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4585 ) 4586 4587 while self._match_set(self.ASSIGNMENT): 4588 if isinstance(this, exp.Column) and len(this.parts) == 1: 4589 this = this.this 4590 4591 this = self.expression( 4592 self.ASSIGNMENT[self._prev.token_type], 4593 this=this, 4594 comments=self._prev_comments, 4595 expression=self._parse_assignment(), 4596 ) 4597 4598 return this 4599 4600 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4601 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4602 4603 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4604 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4605 4606 def _parse_equality(self) -> t.Optional[exp.Expression]: 4607 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4608 4609 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4610 return self._parse_tokens(self._parse_range, self.COMPARISON) 4611 4612 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4613 this = this or self._parse_bitwise() 4614 negate = self._match(TokenType.NOT) 4615 4616 if self._match_set(self.RANGE_PARSERS): 4617 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4618 if not expression: 4619 return this 4620 4621 this = expression 4622 elif self._match(TokenType.ISNULL): 4623 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4624 4625 # Postgres supports ISNULL and NOTNULL for conditions. 4626 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4627 if self._match(TokenType.NOTNULL): 4628 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4629 this = self.expression(exp.Not, this=this) 4630 4631 if negate: 4632 this = self._negate_range(this) 4633 4634 if self._match(TokenType.IS): 4635 this = self._parse_is(this) 4636 4637 return this 4638 4639 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4640 if not this: 4641 return this 4642 4643 return self.expression(exp.Not, this=this) 4644 4645 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4646 index = self._index - 1 4647 negate = self._match(TokenType.NOT) 4648 4649 if self._match_text_seq("DISTINCT", "FROM"): 4650 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4651 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4652 4653 if self._match(TokenType.JSON): 4654 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4655 4656 if self._match_text_seq("WITH"): 4657 _with = True 4658 elif self._match_text_seq("WITHOUT"): 4659 _with = False 4660 else: 4661 _with = None 4662 4663 unique = self._match(TokenType.UNIQUE) 4664 self._match_text_seq("KEYS") 4665 expression: t.Optional[exp.Expression] = self.expression( 4666 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4667 ) 4668 else: 4669 expression = self._parse_primary() or self._parse_null() 4670 if not expression: 4671 self._retreat(index) 4672 return None 4673 4674 this = self.expression(exp.Is, this=this, expression=expression) 4675 return self.expression(exp.Not, this=this) if negate else this 4676 4677 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4678 unnest = self._parse_unnest(with_alias=False) 4679 if unnest: 4680 this = self.expression(exp.In, this=this, unnest=unnest) 4681 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4682 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4683 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4684 4685 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4686 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4687 else: 4688 this = self.expression(exp.In, this=this, expressions=expressions) 4689 4690 if matched_l_paren: 4691 self._match_r_paren(this) 4692 elif not self._match(TokenType.R_BRACKET, expression=this): 4693 self.raise_error("Expecting ]") 4694 else: 4695 this = self.expression(exp.In, this=this, field=self._parse_column()) 4696 4697 return this 4698 4699 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4700 low = self._parse_bitwise() 4701 self._match(TokenType.AND) 4702 high = self._parse_bitwise() 4703 return self.expression(exp.Between, this=this, low=low, high=high) 4704 4705 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4706 if not self._match(TokenType.ESCAPE): 4707 return this 4708 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4709 4710 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4711 index = self._index 4712 4713 if not self._match(TokenType.INTERVAL) and match_interval: 4714 return None 4715 4716 if self._match(TokenType.STRING, advance=False): 4717 this = self._parse_primary() 4718 else: 4719 this = self._parse_term() 4720 4721 if not this or ( 4722 isinstance(this, exp.Column) 4723 and not this.table 4724 and not this.this.quoted 4725 and this.name.upper() == "IS" 4726 ): 4727 self._retreat(index) 4728 return None 4729 4730 unit = self._parse_function() or ( 4731 not self._match(TokenType.ALIAS, advance=False) 4732 and self._parse_var(any_token=True, upper=True) 4733 ) 4734 4735 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4736 # each INTERVAL expression into this canonical form so it's easy to transpile 4737 if this and this.is_number: 4738 this = exp.Literal.string(this.to_py()) 4739 elif this and this.is_string: 4740 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4741 if parts and unit: 4742 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4743 unit = None 4744 self._retreat(self._index - 1) 4745 4746 if len(parts) == 1: 4747 this = exp.Literal.string(parts[0][0]) 4748 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4749 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4750 unit = self.expression( 4751 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4752 ) 4753 4754 interval = self.expression(exp.Interval, this=this, unit=unit) 4755 4756 index = self._index 4757 self._match(TokenType.PLUS) 4758 4759 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4760 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4761 return self.expression( 4762 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4763 ) 4764 4765 self._retreat(index) 4766 return interval 4767 4768 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4769 this = self._parse_term() 4770 4771 while True: 4772 if self._match_set(self.BITWISE): 4773 this = self.expression( 4774 self.BITWISE[self._prev.token_type], 4775 this=this, 4776 expression=self._parse_term(), 4777 ) 4778 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4779 this = self.expression( 4780 exp.DPipe, 4781 this=this, 4782 expression=self._parse_term(), 4783 safe=not self.dialect.STRICT_STRING_CONCAT, 4784 ) 4785 elif self._match(TokenType.DQMARK): 4786 this = self.expression( 4787 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4788 ) 4789 elif self._match_pair(TokenType.LT, TokenType.LT): 4790 this = self.expression( 4791 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4792 ) 4793 elif self._match_pair(TokenType.GT, TokenType.GT): 4794 this = self.expression( 4795 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4796 ) 4797 else: 4798 break 4799 4800 return this 4801 4802 def _parse_term(self) -> t.Optional[exp.Expression]: 4803 this = self._parse_factor() 4804 4805 while self._match_set(self.TERM): 4806 klass = self.TERM[self._prev.token_type] 4807 comments = self._prev_comments 4808 expression = self._parse_factor() 4809 4810 this = self.expression(klass, this=this, comments=comments, expression=expression) 4811 4812 if isinstance(this, exp.Collate): 4813 expr = this.expression 4814 4815 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4816 # fallback to Identifier / Var 4817 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4818 ident = expr.this 4819 if isinstance(ident, exp.Identifier): 4820 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4821 4822 return this 4823 4824 def _parse_factor(self) -> t.Optional[exp.Expression]: 4825 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4826 this = parse_method() 4827 4828 while self._match_set(self.FACTOR): 4829 klass = self.FACTOR[self._prev.token_type] 4830 comments = self._prev_comments 4831 expression = parse_method() 4832 4833 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4834 self._retreat(self._index - 1) 4835 return this 4836 4837 this = self.expression(klass, this=this, comments=comments, expression=expression) 4838 4839 if isinstance(this, exp.Div): 4840 this.args["typed"] = self.dialect.TYPED_DIVISION 4841 this.args["safe"] = self.dialect.SAFE_DIVISION 4842 4843 return this 4844 4845 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4846 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4847 4848 def _parse_unary(self) -> t.Optional[exp.Expression]: 4849 if self._match_set(self.UNARY_PARSERS): 4850 return self.UNARY_PARSERS[self._prev.token_type](self) 4851 return self._parse_at_time_zone(self._parse_type()) 4852 4853 def _parse_type( 4854 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4855 ) -> t.Optional[exp.Expression]: 4856 interval = parse_interval and self._parse_interval() 4857 if interval: 4858 return interval 4859 4860 index = self._index 4861 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4862 4863 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4864 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4865 if isinstance(data_type, exp.Cast): 4866 # This constructor can contain ops directly after it, for instance struct unnesting: 4867 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4868 return self._parse_column_ops(data_type) 4869 4870 if data_type: 4871 index2 = self._index 4872 this = self._parse_primary() 4873 4874 if isinstance(this, exp.Literal): 4875 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4876 if parser: 4877 return parser(self, this, data_type) 4878 4879 return self.expression(exp.Cast, this=this, to=data_type) 4880 4881 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4882 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4883 # 4884 # If the index difference here is greater than 1, that means the parser itself must have 4885 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4886 # 4887 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4888 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4889 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4890 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4891 # 4892 # In these cases, we don't really want to return the converted type, but instead retreat 4893 # and try to parse a Column or Identifier in the section below. 4894 if data_type.expressions and index2 - index > 1: 4895 self._retreat(index2) 4896 return self._parse_column_ops(data_type) 4897 4898 self._retreat(index) 4899 4900 if fallback_to_identifier: 4901 return self._parse_id_var() 4902 4903 this = self._parse_column() 4904 return this and self._parse_column_ops(this) 4905 4906 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4907 this = self._parse_type() 4908 if not this: 4909 return None 4910 4911 if isinstance(this, exp.Column) and not this.table: 4912 this = exp.var(this.name.upper()) 4913 4914 return self.expression( 4915 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4916 ) 4917 4918 def _parse_types( 4919 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4920 ) -> t.Optional[exp.Expression]: 4921 index = self._index 4922 4923 this: t.Optional[exp.Expression] = None 4924 prefix = self._match_text_seq("SYSUDTLIB", ".") 4925 4926 if not self._match_set(self.TYPE_TOKENS): 4927 identifier = allow_identifiers and self._parse_id_var( 4928 any_token=False, tokens=(TokenType.VAR,) 4929 ) 4930 if isinstance(identifier, exp.Identifier): 4931 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4932 4933 if len(tokens) != 1: 4934 self.raise_error("Unexpected identifier", self._prev) 4935 4936 if tokens[0].token_type in self.TYPE_TOKENS: 4937 self._prev = tokens[0] 4938 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4939 type_name = identifier.name 4940 4941 while self._match(TokenType.DOT): 4942 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4943 4944 this = exp.DataType.build(type_name, udt=True) 4945 else: 4946 self._retreat(self._index - 1) 4947 return None 4948 else: 4949 return None 4950 4951 type_token = self._prev.token_type 4952 4953 if type_token == TokenType.PSEUDO_TYPE: 4954 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4955 4956 if type_token == TokenType.OBJECT_IDENTIFIER: 4957 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4958 4959 # https://materialize.com/docs/sql/types/map/ 4960 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4961 key_type = self._parse_types( 4962 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4963 ) 4964 if not self._match(TokenType.FARROW): 4965 self._retreat(index) 4966 return None 4967 4968 value_type = self._parse_types( 4969 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4970 ) 4971 if not self._match(TokenType.R_BRACKET): 4972 self._retreat(index) 4973 return None 4974 4975 return exp.DataType( 4976 this=exp.DataType.Type.MAP, 4977 expressions=[key_type, value_type], 4978 nested=True, 4979 prefix=prefix, 4980 ) 4981 4982 nested = type_token in self.NESTED_TYPE_TOKENS 4983 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4984 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4985 expressions = None 4986 maybe_func = False 4987 4988 if self._match(TokenType.L_PAREN): 4989 if is_struct: 4990 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4991 elif nested: 4992 expressions = self._parse_csv( 4993 lambda: self._parse_types( 4994 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4995 ) 4996 ) 4997 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4998 this = expressions[0] 4999 this.set("nullable", True) 5000 self._match_r_paren() 5001 return this 5002 elif type_token in self.ENUM_TYPE_TOKENS: 5003 expressions = self._parse_csv(self._parse_equality) 5004 elif is_aggregate: 5005 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5006 any_token=False, tokens=(TokenType.VAR,) 5007 ) 5008 if not func_or_ident or not self._match(TokenType.COMMA): 5009 return None 5010 expressions = self._parse_csv( 5011 lambda: self._parse_types( 5012 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5013 ) 5014 ) 5015 expressions.insert(0, func_or_ident) 5016 else: 5017 expressions = self._parse_csv(self._parse_type_size) 5018 5019 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5020 if type_token == TokenType.VECTOR and len(expressions) == 2: 5021 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5022 5023 if not expressions or not self._match(TokenType.R_PAREN): 5024 self._retreat(index) 5025 return None 5026 5027 maybe_func = True 5028 5029 values: t.Optional[t.List[exp.Expression]] = None 5030 5031 if nested and self._match(TokenType.LT): 5032 if is_struct: 5033 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5034 else: 5035 expressions = self._parse_csv( 5036 lambda: self._parse_types( 5037 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5038 ) 5039 ) 5040 5041 if not self._match(TokenType.GT): 5042 self.raise_error("Expecting >") 5043 5044 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5045 values = self._parse_csv(self._parse_assignment) 5046 if not values and is_struct: 5047 values = None 5048 self._retreat(self._index - 1) 5049 else: 5050 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5051 5052 if type_token in self.TIMESTAMPS: 5053 if self._match_text_seq("WITH", "TIME", "ZONE"): 5054 maybe_func = False 5055 tz_type = ( 5056 exp.DataType.Type.TIMETZ 5057 if type_token in self.TIMES 5058 else exp.DataType.Type.TIMESTAMPTZ 5059 ) 5060 this = exp.DataType(this=tz_type, expressions=expressions) 5061 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5062 maybe_func = False 5063 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5064 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5065 maybe_func = False 5066 elif type_token == TokenType.INTERVAL: 5067 unit = self._parse_var(upper=True) 5068 if unit: 5069 if self._match_text_seq("TO"): 5070 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5071 5072 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5073 else: 5074 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5075 5076 if maybe_func and check_func: 5077 index2 = self._index 5078 peek = self._parse_string() 5079 5080 if not peek: 5081 self._retreat(index) 5082 return None 5083 5084 self._retreat(index2) 5085 5086 if not this: 5087 if self._match_text_seq("UNSIGNED"): 5088 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5089 if not unsigned_type_token: 5090 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5091 5092 type_token = unsigned_type_token or type_token 5093 5094 this = exp.DataType( 5095 this=exp.DataType.Type[type_token.value], 5096 expressions=expressions, 5097 nested=nested, 5098 prefix=prefix, 5099 ) 5100 5101 # Empty arrays/structs are allowed 5102 if values is not None: 5103 cls = exp.Struct if is_struct else exp.Array 5104 this = exp.cast(cls(expressions=values), this, copy=False) 5105 5106 elif expressions: 5107 this.set("expressions", expressions) 5108 5109 # https://materialize.com/docs/sql/types/list/#type-name 5110 while self._match(TokenType.LIST): 5111 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5112 5113 index = self._index 5114 5115 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5116 matched_array = self._match(TokenType.ARRAY) 5117 5118 while self._curr: 5119 datatype_token = self._prev.token_type 5120 matched_l_bracket = self._match(TokenType.L_BRACKET) 5121 5122 if (not matched_l_bracket and not matched_array) or ( 5123 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5124 ): 5125 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5126 # not to be confused with the fixed size array parsing 5127 break 5128 5129 matched_array = False 5130 values = self._parse_csv(self._parse_assignment) or None 5131 if ( 5132 values 5133 and not schema 5134 and ( 5135 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5136 ) 5137 ): 5138 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5139 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5140 self._retreat(index) 5141 break 5142 5143 this = exp.DataType( 5144 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5145 ) 5146 self._match(TokenType.R_BRACKET) 5147 5148 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5149 converter = self.TYPE_CONVERTERS.get(this.this) 5150 if converter: 5151 this = converter(t.cast(exp.DataType, this)) 5152 5153 return this 5154 5155 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5156 index = self._index 5157 5158 if ( 5159 self._curr 5160 and self._next 5161 and self._curr.token_type in self.TYPE_TOKENS 5162 and self._next.token_type in self.TYPE_TOKENS 5163 ): 5164 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5165 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5166 this = self._parse_id_var() 5167 else: 5168 this = ( 5169 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5170 or self._parse_id_var() 5171 ) 5172 5173 self._match(TokenType.COLON) 5174 5175 if ( 5176 type_required 5177 and not isinstance(this, exp.DataType) 5178 and not self._match_set(self.TYPE_TOKENS, advance=False) 5179 ): 5180 self._retreat(index) 5181 return self._parse_types() 5182 5183 return self._parse_column_def(this) 5184 5185 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5186 if not self._match_text_seq("AT", "TIME", "ZONE"): 5187 return this 5188 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5189 5190 def _parse_column(self) -> t.Optional[exp.Expression]: 5191 this = self._parse_column_reference() 5192 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5193 5194 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5195 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5196 5197 return column 5198 5199 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5200 this = self._parse_field() 5201 if ( 5202 not this 5203 and self._match(TokenType.VALUES, advance=False) 5204 and self.VALUES_FOLLOWED_BY_PAREN 5205 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5206 ): 5207 this = self._parse_id_var() 5208 5209 if isinstance(this, exp.Identifier): 5210 # We bubble up comments from the Identifier to the Column 5211 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5212 5213 return this 5214 5215 def _parse_colon_as_variant_extract( 5216 self, this: t.Optional[exp.Expression] 5217 ) -> t.Optional[exp.Expression]: 5218 casts = [] 5219 json_path = [] 5220 escape = None 5221 5222 while self._match(TokenType.COLON): 5223 start_index = self._index 5224 5225 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5226 path = self._parse_column_ops( 5227 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5228 ) 5229 5230 # The cast :: operator has a lower precedence than the extraction operator :, so 5231 # we rearrange the AST appropriately to avoid casting the JSON path 5232 while isinstance(path, exp.Cast): 5233 casts.append(path.to) 5234 path = path.this 5235 5236 if casts: 5237 dcolon_offset = next( 5238 i 5239 for i, t in enumerate(self._tokens[start_index:]) 5240 if t.token_type == TokenType.DCOLON 5241 ) 5242 end_token = self._tokens[start_index + dcolon_offset - 1] 5243 else: 5244 end_token = self._prev 5245 5246 if path: 5247 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5248 # it'll roundtrip to a string literal in GET_PATH 5249 if isinstance(path, exp.Identifier) and path.quoted: 5250 escape = True 5251 5252 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5253 5254 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5255 # Databricks transforms it back to the colon/dot notation 5256 if json_path: 5257 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5258 5259 if json_path_expr: 5260 json_path_expr.set("escape", escape) 5261 5262 this = self.expression( 5263 exp.JSONExtract, 5264 this=this, 5265 expression=json_path_expr, 5266 variant_extract=True, 5267 ) 5268 5269 while casts: 5270 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5271 5272 return this 5273 5274 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5275 return self._parse_types() 5276 5277 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5278 this = self._parse_bracket(this) 5279 5280 while self._match_set(self.COLUMN_OPERATORS): 5281 op_token = self._prev.token_type 5282 op = self.COLUMN_OPERATORS.get(op_token) 5283 5284 if op_token == TokenType.DCOLON: 5285 field = self._parse_dcolon() 5286 if not field: 5287 self.raise_error("Expected type") 5288 elif op and self._curr: 5289 field = self._parse_column_reference() or self._parse_bracket() 5290 else: 5291 field = self._parse_field(any_token=True, anonymous_func=True) 5292 5293 if isinstance(field, (exp.Func, exp.Window)) and this: 5294 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5295 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5296 this = exp.replace_tree( 5297 this, 5298 lambda n: ( 5299 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5300 if n.table 5301 else n.this 5302 ) 5303 if isinstance(n, exp.Column) 5304 else n, 5305 ) 5306 5307 if op: 5308 this = op(self, this, field) 5309 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5310 this = self.expression( 5311 exp.Column, 5312 comments=this.comments, 5313 this=field, 5314 table=this.this, 5315 db=this.args.get("table"), 5316 catalog=this.args.get("db"), 5317 ) 5318 elif isinstance(field, exp.Window): 5319 # Move the exp.Dot's to the window's function 5320 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5321 field.set("this", window_func) 5322 this = field 5323 else: 5324 this = self.expression(exp.Dot, this=this, expression=field) 5325 5326 if field and field.comments: 5327 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5328 5329 this = self._parse_bracket(this) 5330 5331 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5332 5333 def _parse_primary(self) -> t.Optional[exp.Expression]: 5334 if self._match_set(self.PRIMARY_PARSERS): 5335 token_type = self._prev.token_type 5336 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5337 5338 if token_type == TokenType.STRING: 5339 expressions = [primary] 5340 while self._match(TokenType.STRING): 5341 expressions.append(exp.Literal.string(self._prev.text)) 5342 5343 if len(expressions) > 1: 5344 return self.expression(exp.Concat, expressions=expressions) 5345 5346 return primary 5347 5348 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5349 return exp.Literal.number(f"0.{self._prev.text}") 5350 5351 if self._match(TokenType.L_PAREN): 5352 comments = self._prev_comments 5353 query = self._parse_select() 5354 5355 if query: 5356 expressions = [query] 5357 else: 5358 expressions = self._parse_expressions() 5359 5360 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5361 5362 if not this and self._match(TokenType.R_PAREN, advance=False): 5363 this = self.expression(exp.Tuple) 5364 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5365 this = self._parse_subquery(this=this, parse_alias=False) 5366 elif isinstance(this, exp.Subquery): 5367 this = self._parse_subquery( 5368 this=self._parse_set_operations(this), parse_alias=False 5369 ) 5370 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5371 this = self.expression(exp.Tuple, expressions=expressions) 5372 else: 5373 this = self.expression(exp.Paren, this=this) 5374 5375 if this: 5376 this.add_comments(comments) 5377 5378 self._match_r_paren(expression=this) 5379 return this 5380 5381 return None 5382 5383 def _parse_field( 5384 self, 5385 any_token: bool = False, 5386 tokens: t.Optional[t.Collection[TokenType]] = None, 5387 anonymous_func: bool = False, 5388 ) -> t.Optional[exp.Expression]: 5389 if anonymous_func: 5390 field = ( 5391 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5392 or self._parse_primary() 5393 ) 5394 else: 5395 field = self._parse_primary() or self._parse_function( 5396 anonymous=anonymous_func, any_token=any_token 5397 ) 5398 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5399 5400 def _parse_function( 5401 self, 5402 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5403 anonymous: bool = False, 5404 optional_parens: bool = True, 5405 any_token: bool = False, 5406 ) -> t.Optional[exp.Expression]: 5407 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5408 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5409 fn_syntax = False 5410 if ( 5411 self._match(TokenType.L_BRACE, advance=False) 5412 and self._next 5413 and self._next.text.upper() == "FN" 5414 ): 5415 self._advance(2) 5416 fn_syntax = True 5417 5418 func = self._parse_function_call( 5419 functions=functions, 5420 anonymous=anonymous, 5421 optional_parens=optional_parens, 5422 any_token=any_token, 5423 ) 5424 5425 if fn_syntax: 5426 self._match(TokenType.R_BRACE) 5427 5428 return func 5429 5430 def _parse_function_call( 5431 self, 5432 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5433 anonymous: bool = False, 5434 optional_parens: bool = True, 5435 any_token: bool = False, 5436 ) -> t.Optional[exp.Expression]: 5437 if not self._curr: 5438 return None 5439 5440 comments = self._curr.comments 5441 token_type = self._curr.token_type 5442 this = self._curr.text 5443 upper = this.upper() 5444 5445 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5446 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5447 self._advance() 5448 return self._parse_window(parser(self)) 5449 5450 if not self._next or self._next.token_type != TokenType.L_PAREN: 5451 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5452 self._advance() 5453 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5454 5455 return None 5456 5457 if any_token: 5458 if token_type in self.RESERVED_TOKENS: 5459 return None 5460 elif token_type not in self.FUNC_TOKENS: 5461 return None 5462 5463 self._advance(2) 5464 5465 parser = self.FUNCTION_PARSERS.get(upper) 5466 if parser and not anonymous: 5467 this = parser(self) 5468 else: 5469 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5470 5471 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5472 this = self.expression( 5473 subquery_predicate, comments=comments, this=self._parse_select() 5474 ) 5475 self._match_r_paren() 5476 return this 5477 5478 if functions is None: 5479 functions = self.FUNCTIONS 5480 5481 function = functions.get(upper) 5482 known_function = function and not anonymous 5483 5484 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5485 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5486 5487 post_func_comments = self._curr and self._curr.comments 5488 if known_function and post_func_comments: 5489 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5490 # call we'll construct it as exp.Anonymous, even if it's "known" 5491 if any( 5492 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5493 for comment in post_func_comments 5494 ): 5495 known_function = False 5496 5497 if alias and known_function: 5498 args = self._kv_to_prop_eq(args) 5499 5500 if known_function: 5501 func_builder = t.cast(t.Callable, function) 5502 5503 if "dialect" in func_builder.__code__.co_varnames: 5504 func = func_builder(args, dialect=self.dialect) 5505 else: 5506 func = func_builder(args) 5507 5508 func = self.validate_expression(func, args) 5509 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5510 func.meta["name"] = this 5511 5512 this = func 5513 else: 5514 if token_type == TokenType.IDENTIFIER: 5515 this = exp.Identifier(this=this, quoted=True) 5516 this = self.expression(exp.Anonymous, this=this, expressions=args) 5517 5518 if isinstance(this, exp.Expression): 5519 this.add_comments(comments) 5520 5521 self._match_r_paren(this) 5522 return self._parse_window(this) 5523 5524 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5525 return expression 5526 5527 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5528 transformed = [] 5529 5530 for index, e in enumerate(expressions): 5531 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5532 if isinstance(e, exp.Alias): 5533 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5534 5535 if not isinstance(e, exp.PropertyEQ): 5536 e = self.expression( 5537 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5538 ) 5539 5540 if isinstance(e.this, exp.Column): 5541 e.this.replace(e.this.this) 5542 else: 5543 e = self._to_prop_eq(e, index) 5544 5545 transformed.append(e) 5546 5547 return transformed 5548 5549 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5550 return self._parse_statement() 5551 5552 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5553 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5554 5555 def _parse_user_defined_function( 5556 self, kind: t.Optional[TokenType] = None 5557 ) -> t.Optional[exp.Expression]: 5558 this = self._parse_id_var() 5559 5560 while self._match(TokenType.DOT): 5561 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5562 5563 if not self._match(TokenType.L_PAREN): 5564 return this 5565 5566 expressions = self._parse_csv(self._parse_function_parameter) 5567 self._match_r_paren() 5568 return self.expression( 5569 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5570 ) 5571 5572 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5573 literal = self._parse_primary() 5574 if literal: 5575 return self.expression(exp.Introducer, this=token.text, expression=literal) 5576 5577 return self.expression(exp.Identifier, this=token.text) 5578 5579 def _parse_session_parameter(self) -> exp.SessionParameter: 5580 kind = None 5581 this = self._parse_id_var() or self._parse_primary() 5582 5583 if this and self._match(TokenType.DOT): 5584 kind = this.name 5585 this = self._parse_var() or self._parse_primary() 5586 5587 return self.expression(exp.SessionParameter, this=this, kind=kind) 5588 5589 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5590 return self._parse_id_var() 5591 5592 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5593 index = self._index 5594 5595 if self._match(TokenType.L_PAREN): 5596 expressions = t.cast( 5597 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5598 ) 5599 5600 if not self._match(TokenType.R_PAREN): 5601 self._retreat(index) 5602 else: 5603 expressions = [self._parse_lambda_arg()] 5604 5605 if self._match_set(self.LAMBDAS): 5606 return self.LAMBDAS[self._prev.token_type](self, expressions) 5607 5608 self._retreat(index) 5609 5610 this: t.Optional[exp.Expression] 5611 5612 if self._match(TokenType.DISTINCT): 5613 this = self.expression( 5614 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5615 ) 5616 else: 5617 this = self._parse_select_or_expression(alias=alias) 5618 5619 return self._parse_limit( 5620 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5621 ) 5622 5623 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5624 index = self._index 5625 if not self._match(TokenType.L_PAREN): 5626 return this 5627 5628 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5629 # expr can be of both types 5630 if self._match_set(self.SELECT_START_TOKENS): 5631 self._retreat(index) 5632 return this 5633 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5634 self._match_r_paren() 5635 return self.expression(exp.Schema, this=this, expressions=args) 5636 5637 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5638 return self._parse_column_def(self._parse_field(any_token=True)) 5639 5640 def _parse_column_def( 5641 self, this: t.Optional[exp.Expression], computed_column: bool = True 5642 ) -> t.Optional[exp.Expression]: 5643 # column defs are not really columns, they're identifiers 5644 if isinstance(this, exp.Column): 5645 this = this.this 5646 5647 if not computed_column: 5648 self._match(TokenType.ALIAS) 5649 5650 kind = self._parse_types(schema=True) 5651 5652 if self._match_text_seq("FOR", "ORDINALITY"): 5653 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5654 5655 constraints: t.List[exp.Expression] = [] 5656 5657 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5658 ("ALIAS", "MATERIALIZED") 5659 ): 5660 persisted = self._prev.text.upper() == "MATERIALIZED" 5661 constraint_kind = exp.ComputedColumnConstraint( 5662 this=self._parse_assignment(), 5663 persisted=persisted or self._match_text_seq("PERSISTED"), 5664 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5665 ) 5666 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5667 elif ( 5668 kind 5669 and self._match(TokenType.ALIAS, advance=False) 5670 and ( 5671 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5672 or (self._next and self._next.token_type == TokenType.L_PAREN) 5673 ) 5674 ): 5675 self._advance() 5676 constraints.append( 5677 self.expression( 5678 exp.ColumnConstraint, 5679 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5680 ) 5681 ) 5682 5683 while True: 5684 constraint = self._parse_column_constraint() 5685 if not constraint: 5686 break 5687 constraints.append(constraint) 5688 5689 if not kind and not constraints: 5690 return this 5691 5692 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5693 5694 def _parse_auto_increment( 5695 self, 5696 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5697 start = None 5698 increment = None 5699 5700 if self._match(TokenType.L_PAREN, advance=False): 5701 args = self._parse_wrapped_csv(self._parse_bitwise) 5702 start = seq_get(args, 0) 5703 increment = seq_get(args, 1) 5704 elif self._match_text_seq("START"): 5705 start = self._parse_bitwise() 5706 self._match_text_seq("INCREMENT") 5707 increment = self._parse_bitwise() 5708 5709 if start and increment: 5710 return exp.GeneratedAsIdentityColumnConstraint( 5711 start=start, increment=increment, this=False 5712 ) 5713 5714 return exp.AutoIncrementColumnConstraint() 5715 5716 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5717 if not self._match_text_seq("REFRESH"): 5718 self._retreat(self._index - 1) 5719 return None 5720 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5721 5722 def _parse_compress(self) -> exp.CompressColumnConstraint: 5723 if self._match(TokenType.L_PAREN, advance=False): 5724 return self.expression( 5725 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5726 ) 5727 5728 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5729 5730 def _parse_generated_as_identity( 5731 self, 5732 ) -> ( 5733 exp.GeneratedAsIdentityColumnConstraint 5734 | exp.ComputedColumnConstraint 5735 | exp.GeneratedAsRowColumnConstraint 5736 ): 5737 if self._match_text_seq("BY", "DEFAULT"): 5738 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5739 this = self.expression( 5740 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5741 ) 5742 else: 5743 self._match_text_seq("ALWAYS") 5744 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5745 5746 self._match(TokenType.ALIAS) 5747 5748 if self._match_text_seq("ROW"): 5749 start = self._match_text_seq("START") 5750 if not start: 5751 self._match(TokenType.END) 5752 hidden = self._match_text_seq("HIDDEN") 5753 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5754 5755 identity = self._match_text_seq("IDENTITY") 5756 5757 if self._match(TokenType.L_PAREN): 5758 if self._match(TokenType.START_WITH): 5759 this.set("start", self._parse_bitwise()) 5760 if self._match_text_seq("INCREMENT", "BY"): 5761 this.set("increment", self._parse_bitwise()) 5762 if self._match_text_seq("MINVALUE"): 5763 this.set("minvalue", self._parse_bitwise()) 5764 if self._match_text_seq("MAXVALUE"): 5765 this.set("maxvalue", self._parse_bitwise()) 5766 5767 if self._match_text_seq("CYCLE"): 5768 this.set("cycle", True) 5769 elif self._match_text_seq("NO", "CYCLE"): 5770 this.set("cycle", False) 5771 5772 if not identity: 5773 this.set("expression", self._parse_range()) 5774 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5775 args = self._parse_csv(self._parse_bitwise) 5776 this.set("start", seq_get(args, 0)) 5777 this.set("increment", seq_get(args, 1)) 5778 5779 self._match_r_paren() 5780 5781 return this 5782 5783 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5784 self._match_text_seq("LENGTH") 5785 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5786 5787 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5788 if self._match_text_seq("NULL"): 5789 return self.expression(exp.NotNullColumnConstraint) 5790 if self._match_text_seq("CASESPECIFIC"): 5791 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5792 if self._match_text_seq("FOR", "REPLICATION"): 5793 return self.expression(exp.NotForReplicationColumnConstraint) 5794 5795 # Unconsume the `NOT` token 5796 self._retreat(self._index - 1) 5797 return None 5798 5799 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5800 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5801 5802 procedure_option_follows = ( 5803 self._match(TokenType.WITH, advance=False) 5804 and self._next 5805 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5806 ) 5807 5808 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5809 return self.expression( 5810 exp.ColumnConstraint, 5811 this=this, 5812 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5813 ) 5814 5815 return this 5816 5817 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5818 if not self._match(TokenType.CONSTRAINT): 5819 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5820 5821 return self.expression( 5822 exp.Constraint, 5823 this=self._parse_id_var(), 5824 expressions=self._parse_unnamed_constraints(), 5825 ) 5826 5827 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5828 constraints = [] 5829 while True: 5830 constraint = self._parse_unnamed_constraint() or self._parse_function() 5831 if not constraint: 5832 break 5833 constraints.append(constraint) 5834 5835 return constraints 5836 5837 def _parse_unnamed_constraint( 5838 self, constraints: t.Optional[t.Collection[str]] = None 5839 ) -> t.Optional[exp.Expression]: 5840 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5841 constraints or self.CONSTRAINT_PARSERS 5842 ): 5843 return None 5844 5845 constraint = self._prev.text.upper() 5846 if constraint not in self.CONSTRAINT_PARSERS: 5847 self.raise_error(f"No parser found for schema constraint {constraint}.") 5848 5849 return self.CONSTRAINT_PARSERS[constraint](self) 5850 5851 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5852 return self._parse_id_var(any_token=False) 5853 5854 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5855 self._match_text_seq("KEY") 5856 return self.expression( 5857 exp.UniqueColumnConstraint, 5858 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5859 this=self._parse_schema(self._parse_unique_key()), 5860 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5861 on_conflict=self._parse_on_conflict(), 5862 ) 5863 5864 def _parse_key_constraint_options(self) -> t.List[str]: 5865 options = [] 5866 while True: 5867 if not self._curr: 5868 break 5869 5870 if self._match(TokenType.ON): 5871 action = None 5872 on = self._advance_any() and self._prev.text 5873 5874 if self._match_text_seq("NO", "ACTION"): 5875 action = "NO ACTION" 5876 elif self._match_text_seq("CASCADE"): 5877 action = "CASCADE" 5878 elif self._match_text_seq("RESTRICT"): 5879 action = "RESTRICT" 5880 elif self._match_pair(TokenType.SET, TokenType.NULL): 5881 action = "SET NULL" 5882 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5883 action = "SET DEFAULT" 5884 else: 5885 self.raise_error("Invalid key constraint") 5886 5887 options.append(f"ON {on} {action}") 5888 else: 5889 var = self._parse_var_from_options( 5890 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5891 ) 5892 if not var: 5893 break 5894 options.append(var.name) 5895 5896 return options 5897 5898 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5899 if match and not self._match(TokenType.REFERENCES): 5900 return None 5901 5902 expressions = None 5903 this = self._parse_table(schema=True) 5904 options = self._parse_key_constraint_options() 5905 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5906 5907 def _parse_foreign_key(self) -> exp.ForeignKey: 5908 expressions = self._parse_wrapped_id_vars() 5909 reference = self._parse_references() 5910 options = {} 5911 5912 while self._match(TokenType.ON): 5913 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5914 self.raise_error("Expected DELETE or UPDATE") 5915 5916 kind = self._prev.text.lower() 5917 5918 if self._match_text_seq("NO", "ACTION"): 5919 action = "NO ACTION" 5920 elif self._match(TokenType.SET): 5921 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5922 action = "SET " + self._prev.text.upper() 5923 else: 5924 self._advance() 5925 action = self._prev.text.upper() 5926 5927 options[kind] = action 5928 5929 return self.expression( 5930 exp.ForeignKey, 5931 expressions=expressions, 5932 reference=reference, 5933 **options, # type: ignore 5934 ) 5935 5936 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5937 return self._parse_ordered() or self._parse_field() 5938 5939 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5940 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5941 self._retreat(self._index - 1) 5942 return None 5943 5944 id_vars = self._parse_wrapped_id_vars() 5945 return self.expression( 5946 exp.PeriodForSystemTimeConstraint, 5947 this=seq_get(id_vars, 0), 5948 expression=seq_get(id_vars, 1), 5949 ) 5950 5951 def _parse_primary_key( 5952 self, wrapped_optional: bool = False, in_props: bool = False 5953 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5954 desc = ( 5955 self._match_set((TokenType.ASC, TokenType.DESC)) 5956 and self._prev.token_type == TokenType.DESC 5957 ) 5958 5959 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5960 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5961 5962 expressions = self._parse_wrapped_csv( 5963 self._parse_primary_key_part, optional=wrapped_optional 5964 ) 5965 options = self._parse_key_constraint_options() 5966 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5967 5968 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5969 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5970 5971 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5972 """ 5973 Parses a datetime column in ODBC format. We parse the column into the corresponding 5974 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5975 same as we did for `DATE('yyyy-mm-dd')`. 5976 5977 Reference: 5978 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5979 """ 5980 self._match(TokenType.VAR) 5981 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5982 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5983 if not self._match(TokenType.R_BRACE): 5984 self.raise_error("Expected }") 5985 return expression 5986 5987 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5988 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5989 return this 5990 5991 bracket_kind = self._prev.token_type 5992 if ( 5993 bracket_kind == TokenType.L_BRACE 5994 and self._curr 5995 and self._curr.token_type == TokenType.VAR 5996 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5997 ): 5998 return self._parse_odbc_datetime_literal() 5999 6000 expressions = self._parse_csv( 6001 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6002 ) 6003 6004 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6005 self.raise_error("Expected ]") 6006 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6007 self.raise_error("Expected }") 6008 6009 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6010 if bracket_kind == TokenType.L_BRACE: 6011 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6012 elif not this: 6013 this = build_array_constructor( 6014 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6015 ) 6016 else: 6017 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6018 if constructor_type: 6019 return build_array_constructor( 6020 constructor_type, 6021 args=expressions, 6022 bracket_kind=bracket_kind, 6023 dialect=self.dialect, 6024 ) 6025 6026 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6027 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6028 6029 self._add_comments(this) 6030 return self._parse_bracket(this) 6031 6032 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6033 if self._match(TokenType.COLON): 6034 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6035 return this 6036 6037 def _parse_case(self) -> t.Optional[exp.Expression]: 6038 ifs = [] 6039 default = None 6040 6041 comments = self._prev_comments 6042 expression = self._parse_assignment() 6043 6044 while self._match(TokenType.WHEN): 6045 this = self._parse_assignment() 6046 self._match(TokenType.THEN) 6047 then = self._parse_assignment() 6048 ifs.append(self.expression(exp.If, this=this, true=then)) 6049 6050 if self._match(TokenType.ELSE): 6051 default = self._parse_assignment() 6052 6053 if not self._match(TokenType.END): 6054 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6055 default = exp.column("interval") 6056 else: 6057 self.raise_error("Expected END after CASE", self._prev) 6058 6059 return self.expression( 6060 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6061 ) 6062 6063 def _parse_if(self) -> t.Optional[exp.Expression]: 6064 if self._match(TokenType.L_PAREN): 6065 args = self._parse_csv(self._parse_assignment) 6066 this = self.validate_expression(exp.If.from_arg_list(args), args) 6067 self._match_r_paren() 6068 else: 6069 index = self._index - 1 6070 6071 if self.NO_PAREN_IF_COMMANDS and index == 0: 6072 return self._parse_as_command(self._prev) 6073 6074 condition = self._parse_assignment() 6075 6076 if not condition: 6077 self._retreat(index) 6078 return None 6079 6080 self._match(TokenType.THEN) 6081 true = self._parse_assignment() 6082 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6083 self._match(TokenType.END) 6084 this = self.expression(exp.If, this=condition, true=true, false=false) 6085 6086 return this 6087 6088 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6089 if not self._match_text_seq("VALUE", "FOR"): 6090 self._retreat(self._index - 1) 6091 return None 6092 6093 return self.expression( 6094 exp.NextValueFor, 6095 this=self._parse_column(), 6096 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6097 ) 6098 6099 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6100 this = self._parse_function() or self._parse_var_or_string(upper=True) 6101 6102 if self._match(TokenType.FROM): 6103 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6104 6105 if not self._match(TokenType.COMMA): 6106 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6107 6108 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6109 6110 def _parse_gap_fill(self) -> exp.GapFill: 6111 self._match(TokenType.TABLE) 6112 this = self._parse_table() 6113 6114 self._match(TokenType.COMMA) 6115 args = [this, *self._parse_csv(self._parse_lambda)] 6116 6117 gap_fill = exp.GapFill.from_arg_list(args) 6118 return self.validate_expression(gap_fill, args) 6119 6120 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6121 this = self._parse_assignment() 6122 6123 if not self._match(TokenType.ALIAS): 6124 if self._match(TokenType.COMMA): 6125 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6126 6127 self.raise_error("Expected AS after CAST") 6128 6129 fmt = None 6130 to = self._parse_types() 6131 6132 default = self._match(TokenType.DEFAULT) 6133 if default: 6134 default = self._parse_bitwise() 6135 self._match_text_seq("ON", "CONVERSION", "ERROR") 6136 6137 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6138 fmt_string = self._parse_string() 6139 fmt = self._parse_at_time_zone(fmt_string) 6140 6141 if not to: 6142 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6143 if to.this in exp.DataType.TEMPORAL_TYPES: 6144 this = self.expression( 6145 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6146 this=this, 6147 format=exp.Literal.string( 6148 format_time( 6149 fmt_string.this if fmt_string else "", 6150 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6151 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6152 ) 6153 ), 6154 safe=safe, 6155 ) 6156 6157 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6158 this.set("zone", fmt.args["zone"]) 6159 return this 6160 elif not to: 6161 self.raise_error("Expected TYPE after CAST") 6162 elif isinstance(to, exp.Identifier): 6163 to = exp.DataType.build(to.name, udt=True) 6164 elif to.this == exp.DataType.Type.CHAR: 6165 if self._match(TokenType.CHARACTER_SET): 6166 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6167 6168 return self.expression( 6169 exp.Cast if strict else exp.TryCast, 6170 this=this, 6171 to=to, 6172 format=fmt, 6173 safe=safe, 6174 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6175 default=default, 6176 ) 6177 6178 def _parse_string_agg(self) -> exp.GroupConcat: 6179 if self._match(TokenType.DISTINCT): 6180 args: t.List[t.Optional[exp.Expression]] = [ 6181 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6182 ] 6183 if self._match(TokenType.COMMA): 6184 args.extend(self._parse_csv(self._parse_assignment)) 6185 else: 6186 args = self._parse_csv(self._parse_assignment) # type: ignore 6187 6188 if self._match_text_seq("ON", "OVERFLOW"): 6189 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6190 if self._match_text_seq("ERROR"): 6191 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6192 else: 6193 self._match_text_seq("TRUNCATE") 6194 on_overflow = self.expression( 6195 exp.OverflowTruncateBehavior, 6196 this=self._parse_string(), 6197 with_count=( 6198 self._match_text_seq("WITH", "COUNT") 6199 or not self._match_text_seq("WITHOUT", "COUNT") 6200 ), 6201 ) 6202 else: 6203 on_overflow = None 6204 6205 index = self._index 6206 if not self._match(TokenType.R_PAREN) and args: 6207 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6208 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6209 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6210 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6211 6212 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6213 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6214 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6215 if not self._match_text_seq("WITHIN", "GROUP"): 6216 self._retreat(index) 6217 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6218 6219 # The corresponding match_r_paren will be called in parse_function (caller) 6220 self._match_l_paren() 6221 6222 return self.expression( 6223 exp.GroupConcat, 6224 this=self._parse_order(this=seq_get(args, 0)), 6225 separator=seq_get(args, 1), 6226 on_overflow=on_overflow, 6227 ) 6228 6229 def _parse_convert( 6230 self, strict: bool, safe: t.Optional[bool] = None 6231 ) -> t.Optional[exp.Expression]: 6232 this = self._parse_bitwise() 6233 6234 if self._match(TokenType.USING): 6235 to: t.Optional[exp.Expression] = self.expression( 6236 exp.CharacterSet, this=self._parse_var() 6237 ) 6238 elif self._match(TokenType.COMMA): 6239 to = self._parse_types() 6240 else: 6241 to = None 6242 6243 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6244 6245 def _parse_xml_table(self) -> exp.XMLTable: 6246 namespaces = None 6247 passing = None 6248 columns = None 6249 6250 if self._match_text_seq("XMLNAMESPACES", "("): 6251 namespaces = self._parse_xml_namespace() 6252 self._match_text_seq(")", ",") 6253 6254 this = self._parse_string() 6255 6256 if self._match_text_seq("PASSING"): 6257 # The BY VALUE keywords are optional and are provided for semantic clarity 6258 self._match_text_seq("BY", "VALUE") 6259 passing = self._parse_csv(self._parse_column) 6260 6261 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6262 6263 if self._match_text_seq("COLUMNS"): 6264 columns = self._parse_csv(self._parse_field_def) 6265 6266 return self.expression( 6267 exp.XMLTable, 6268 this=this, 6269 namespaces=namespaces, 6270 passing=passing, 6271 columns=columns, 6272 by_ref=by_ref, 6273 ) 6274 6275 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6276 namespaces = [] 6277 6278 while True: 6279 if self._match(TokenType.DEFAULT): 6280 uri = self._parse_string() 6281 else: 6282 uri = self._parse_alias(self._parse_string()) 6283 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6284 if not self._match(TokenType.COMMA): 6285 break 6286 6287 return namespaces 6288 6289 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6290 """ 6291 There are generally two variants of the DECODE function: 6292 6293 - DECODE(bin, charset) 6294 - DECODE(expression, search, result [, search, result] ... [, default]) 6295 6296 The second variant will always be parsed into a CASE expression. Note that NULL 6297 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6298 instead of relying on pattern matching. 6299 """ 6300 args = self._parse_csv(self._parse_assignment) 6301 6302 if len(args) < 3: 6303 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6304 6305 expression, *expressions = args 6306 if not expression: 6307 return None 6308 6309 ifs = [] 6310 for search, result in zip(expressions[::2], expressions[1::2]): 6311 if not search or not result: 6312 return None 6313 6314 if isinstance(search, exp.Literal): 6315 ifs.append( 6316 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6317 ) 6318 elif isinstance(search, exp.Null): 6319 ifs.append( 6320 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6321 ) 6322 else: 6323 cond = exp.or_( 6324 exp.EQ(this=expression.copy(), expression=search), 6325 exp.and_( 6326 exp.Is(this=expression.copy(), expression=exp.Null()), 6327 exp.Is(this=search.copy(), expression=exp.Null()), 6328 copy=False, 6329 ), 6330 copy=False, 6331 ) 6332 ifs.append(exp.If(this=cond, true=result)) 6333 6334 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6335 6336 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6337 self._match_text_seq("KEY") 6338 key = self._parse_column() 6339 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6340 self._match_text_seq("VALUE") 6341 value = self._parse_bitwise() 6342 6343 if not key and not value: 6344 return None 6345 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6346 6347 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6348 if not this or not self._match_text_seq("FORMAT", "JSON"): 6349 return this 6350 6351 return self.expression(exp.FormatJson, this=this) 6352 6353 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6354 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6355 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6356 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6357 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6358 else: 6359 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6360 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6361 6362 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6363 6364 if not empty and not error and not null: 6365 return None 6366 6367 return self.expression( 6368 exp.OnCondition, 6369 empty=empty, 6370 error=error, 6371 null=null, 6372 ) 6373 6374 def _parse_on_handling( 6375 self, on: str, *values: str 6376 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6377 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6378 for value in values: 6379 if self._match_text_seq(value, "ON", on): 6380 return f"{value} ON {on}" 6381 6382 index = self._index 6383 if self._match(TokenType.DEFAULT): 6384 default_value = self._parse_bitwise() 6385 if self._match_text_seq("ON", on): 6386 return default_value 6387 6388 self._retreat(index) 6389 6390 return None 6391 6392 @t.overload 6393 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6394 6395 @t.overload 6396 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6397 6398 def _parse_json_object(self, agg=False): 6399 star = self._parse_star() 6400 expressions = ( 6401 [star] 6402 if star 6403 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6404 ) 6405 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6406 6407 unique_keys = None 6408 if self._match_text_seq("WITH", "UNIQUE"): 6409 unique_keys = True 6410 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6411 unique_keys = False 6412 6413 self._match_text_seq("KEYS") 6414 6415 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6416 self._parse_type() 6417 ) 6418 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6419 6420 return self.expression( 6421 exp.JSONObjectAgg if agg else exp.JSONObject, 6422 expressions=expressions, 6423 null_handling=null_handling, 6424 unique_keys=unique_keys, 6425 return_type=return_type, 6426 encoding=encoding, 6427 ) 6428 6429 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6430 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6431 if not self._match_text_seq("NESTED"): 6432 this = self._parse_id_var() 6433 kind = self._parse_types(allow_identifiers=False) 6434 nested = None 6435 else: 6436 this = None 6437 kind = None 6438 nested = True 6439 6440 path = self._match_text_seq("PATH") and self._parse_string() 6441 nested_schema = nested and self._parse_json_schema() 6442 6443 return self.expression( 6444 exp.JSONColumnDef, 6445 this=this, 6446 kind=kind, 6447 path=path, 6448 nested_schema=nested_schema, 6449 ) 6450 6451 def _parse_json_schema(self) -> exp.JSONSchema: 6452 self._match_text_seq("COLUMNS") 6453 return self.expression( 6454 exp.JSONSchema, 6455 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6456 ) 6457 6458 def _parse_json_table(self) -> exp.JSONTable: 6459 this = self._parse_format_json(self._parse_bitwise()) 6460 path = self._match(TokenType.COMMA) and self._parse_string() 6461 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6462 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6463 schema = self._parse_json_schema() 6464 6465 return exp.JSONTable( 6466 this=this, 6467 schema=schema, 6468 path=path, 6469 error_handling=error_handling, 6470 empty_handling=empty_handling, 6471 ) 6472 6473 def _parse_match_against(self) -> exp.MatchAgainst: 6474 expressions = self._parse_csv(self._parse_column) 6475 6476 self._match_text_seq(")", "AGAINST", "(") 6477 6478 this = self._parse_string() 6479 6480 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6481 modifier = "IN NATURAL LANGUAGE MODE" 6482 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6483 modifier = f"{modifier} WITH QUERY EXPANSION" 6484 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6485 modifier = "IN BOOLEAN MODE" 6486 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6487 modifier = "WITH QUERY EXPANSION" 6488 else: 6489 modifier = None 6490 6491 return self.expression( 6492 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6493 ) 6494 6495 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6496 def _parse_open_json(self) -> exp.OpenJSON: 6497 this = self._parse_bitwise() 6498 path = self._match(TokenType.COMMA) and self._parse_string() 6499 6500 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6501 this = self._parse_field(any_token=True) 6502 kind = self._parse_types() 6503 path = self._parse_string() 6504 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6505 6506 return self.expression( 6507 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6508 ) 6509 6510 expressions = None 6511 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6512 self._match_l_paren() 6513 expressions = self._parse_csv(_parse_open_json_column_def) 6514 6515 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6516 6517 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6518 args = self._parse_csv(self._parse_bitwise) 6519 6520 if self._match(TokenType.IN): 6521 return self.expression( 6522 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6523 ) 6524 6525 if haystack_first: 6526 haystack = seq_get(args, 0) 6527 needle = seq_get(args, 1) 6528 else: 6529 haystack = seq_get(args, 1) 6530 needle = seq_get(args, 0) 6531 6532 return self.expression( 6533 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6534 ) 6535 6536 def _parse_predict(self) -> exp.Predict: 6537 self._match_text_seq("MODEL") 6538 this = self._parse_table() 6539 6540 self._match(TokenType.COMMA) 6541 self._match_text_seq("TABLE") 6542 6543 return self.expression( 6544 exp.Predict, 6545 this=this, 6546 expression=self._parse_table(), 6547 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6548 ) 6549 6550 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6551 args = self._parse_csv(self._parse_table) 6552 return exp.JoinHint(this=func_name.upper(), expressions=args) 6553 6554 def _parse_substring(self) -> exp.Substring: 6555 # Postgres supports the form: substring(string [from int] [for int]) 6556 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6557 6558 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6559 6560 if self._match(TokenType.FROM): 6561 args.append(self._parse_bitwise()) 6562 if self._match(TokenType.FOR): 6563 if len(args) == 1: 6564 args.append(exp.Literal.number(1)) 6565 args.append(self._parse_bitwise()) 6566 6567 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6568 6569 def _parse_trim(self) -> exp.Trim: 6570 # https://www.w3resource.com/sql/character-functions/trim.php 6571 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6572 6573 position = None 6574 collation = None 6575 expression = None 6576 6577 if self._match_texts(self.TRIM_TYPES): 6578 position = self._prev.text.upper() 6579 6580 this = self._parse_bitwise() 6581 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6582 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6583 expression = self._parse_bitwise() 6584 6585 if invert_order: 6586 this, expression = expression, this 6587 6588 if self._match(TokenType.COLLATE): 6589 collation = self._parse_bitwise() 6590 6591 return self.expression( 6592 exp.Trim, this=this, position=position, expression=expression, collation=collation 6593 ) 6594 6595 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6596 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6597 6598 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6599 return self._parse_window(self._parse_id_var(), alias=True) 6600 6601 def _parse_respect_or_ignore_nulls( 6602 self, this: t.Optional[exp.Expression] 6603 ) -> t.Optional[exp.Expression]: 6604 if self._match_text_seq("IGNORE", "NULLS"): 6605 return self.expression(exp.IgnoreNulls, this=this) 6606 if self._match_text_seq("RESPECT", "NULLS"): 6607 return self.expression(exp.RespectNulls, this=this) 6608 return this 6609 6610 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6611 if self._match(TokenType.HAVING): 6612 self._match_texts(("MAX", "MIN")) 6613 max = self._prev.text.upper() != "MIN" 6614 return self.expression( 6615 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6616 ) 6617 6618 return this 6619 6620 def _parse_window( 6621 self, this: t.Optional[exp.Expression], alias: bool = False 6622 ) -> t.Optional[exp.Expression]: 6623 func = this 6624 comments = func.comments if isinstance(func, exp.Expression) else None 6625 6626 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6627 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6628 if self._match_text_seq("WITHIN", "GROUP"): 6629 order = self._parse_wrapped(self._parse_order) 6630 this = self.expression(exp.WithinGroup, this=this, expression=order) 6631 6632 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6633 self._match(TokenType.WHERE) 6634 this = self.expression( 6635 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6636 ) 6637 self._match_r_paren() 6638 6639 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6640 # Some dialects choose to implement and some do not. 6641 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6642 6643 # There is some code above in _parse_lambda that handles 6644 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6645 6646 # The below changes handle 6647 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6648 6649 # Oracle allows both formats 6650 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6651 # and Snowflake chose to do the same for familiarity 6652 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6653 if isinstance(this, exp.AggFunc): 6654 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6655 6656 if ignore_respect and ignore_respect is not this: 6657 ignore_respect.replace(ignore_respect.this) 6658 this = self.expression(ignore_respect.__class__, this=this) 6659 6660 this = self._parse_respect_or_ignore_nulls(this) 6661 6662 # bigquery select from window x AS (partition by ...) 6663 if alias: 6664 over = None 6665 self._match(TokenType.ALIAS) 6666 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6667 return this 6668 else: 6669 over = self._prev.text.upper() 6670 6671 if comments and isinstance(func, exp.Expression): 6672 func.pop_comments() 6673 6674 if not self._match(TokenType.L_PAREN): 6675 return self.expression( 6676 exp.Window, 6677 comments=comments, 6678 this=this, 6679 alias=self._parse_id_var(False), 6680 over=over, 6681 ) 6682 6683 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6684 6685 first = self._match(TokenType.FIRST) 6686 if self._match_text_seq("LAST"): 6687 first = False 6688 6689 partition, order = self._parse_partition_and_order() 6690 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6691 6692 if kind: 6693 self._match(TokenType.BETWEEN) 6694 start = self._parse_window_spec() 6695 self._match(TokenType.AND) 6696 end = self._parse_window_spec() 6697 6698 spec = self.expression( 6699 exp.WindowSpec, 6700 kind=kind, 6701 start=start["value"], 6702 start_side=start["side"], 6703 end=end["value"], 6704 end_side=end["side"], 6705 ) 6706 else: 6707 spec = None 6708 6709 self._match_r_paren() 6710 6711 window = self.expression( 6712 exp.Window, 6713 comments=comments, 6714 this=this, 6715 partition_by=partition, 6716 order=order, 6717 spec=spec, 6718 alias=window_alias, 6719 over=over, 6720 first=first, 6721 ) 6722 6723 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6724 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6725 return self._parse_window(window, alias=alias) 6726 6727 return window 6728 6729 def _parse_partition_and_order( 6730 self, 6731 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6732 return self._parse_partition_by(), self._parse_order() 6733 6734 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6735 self._match(TokenType.BETWEEN) 6736 6737 return { 6738 "value": ( 6739 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6740 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6741 or self._parse_bitwise() 6742 ), 6743 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6744 } 6745 6746 def _parse_alias( 6747 self, this: t.Optional[exp.Expression], explicit: bool = False 6748 ) -> t.Optional[exp.Expression]: 6749 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6750 # so this section tries to parse the clause version and if it fails, it treats the token 6751 # as an identifier (alias) 6752 if self._can_parse_limit_or_offset(): 6753 return this 6754 6755 any_token = self._match(TokenType.ALIAS) 6756 comments = self._prev_comments or [] 6757 6758 if explicit and not any_token: 6759 return this 6760 6761 if self._match(TokenType.L_PAREN): 6762 aliases = self.expression( 6763 exp.Aliases, 6764 comments=comments, 6765 this=this, 6766 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6767 ) 6768 self._match_r_paren(aliases) 6769 return aliases 6770 6771 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6772 self.STRING_ALIASES and self._parse_string_as_identifier() 6773 ) 6774 6775 if alias: 6776 comments.extend(alias.pop_comments()) 6777 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6778 column = this.this 6779 6780 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6781 if not this.comments and column and column.comments: 6782 this.comments = column.pop_comments() 6783 6784 return this 6785 6786 def _parse_id_var( 6787 self, 6788 any_token: bool = True, 6789 tokens: t.Optional[t.Collection[TokenType]] = None, 6790 ) -> t.Optional[exp.Expression]: 6791 expression = self._parse_identifier() 6792 if not expression and ( 6793 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6794 ): 6795 quoted = self._prev.token_type == TokenType.STRING 6796 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6797 6798 return expression 6799 6800 def _parse_string(self) -> t.Optional[exp.Expression]: 6801 if self._match_set(self.STRING_PARSERS): 6802 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6803 return self._parse_placeholder() 6804 6805 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6806 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6807 6808 def _parse_number(self) -> t.Optional[exp.Expression]: 6809 if self._match_set(self.NUMERIC_PARSERS): 6810 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6811 return self._parse_placeholder() 6812 6813 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6814 if self._match(TokenType.IDENTIFIER): 6815 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6816 return self._parse_placeholder() 6817 6818 def _parse_var( 6819 self, 6820 any_token: bool = False, 6821 tokens: t.Optional[t.Collection[TokenType]] = None, 6822 upper: bool = False, 6823 ) -> t.Optional[exp.Expression]: 6824 if ( 6825 (any_token and self._advance_any()) 6826 or self._match(TokenType.VAR) 6827 or (self._match_set(tokens) if tokens else False) 6828 ): 6829 return self.expression( 6830 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6831 ) 6832 return self._parse_placeholder() 6833 6834 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6835 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6836 self._advance() 6837 return self._prev 6838 return None 6839 6840 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6841 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6842 6843 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6844 return self._parse_primary() or self._parse_var(any_token=True) 6845 6846 def _parse_null(self) -> t.Optional[exp.Expression]: 6847 if self._match_set(self.NULL_TOKENS): 6848 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6849 return self._parse_placeholder() 6850 6851 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6852 if self._match(TokenType.TRUE): 6853 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6854 if self._match(TokenType.FALSE): 6855 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6856 return self._parse_placeholder() 6857 6858 def _parse_star(self) -> t.Optional[exp.Expression]: 6859 if self._match(TokenType.STAR): 6860 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6861 return self._parse_placeholder() 6862 6863 def _parse_parameter(self) -> exp.Parameter: 6864 this = self._parse_identifier() or self._parse_primary_or_var() 6865 return self.expression(exp.Parameter, this=this) 6866 6867 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6868 if self._match_set(self.PLACEHOLDER_PARSERS): 6869 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6870 if placeholder: 6871 return placeholder 6872 self._advance(-1) 6873 return None 6874 6875 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6876 if not self._match_texts(keywords): 6877 return None 6878 if self._match(TokenType.L_PAREN, advance=False): 6879 return self._parse_wrapped_csv(self._parse_expression) 6880 6881 expression = self._parse_expression() 6882 return [expression] if expression else None 6883 6884 def _parse_csv( 6885 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6886 ) -> t.List[exp.Expression]: 6887 parse_result = parse_method() 6888 items = [parse_result] if parse_result is not None else [] 6889 6890 while self._match(sep): 6891 self._add_comments(parse_result) 6892 parse_result = parse_method() 6893 if parse_result is not None: 6894 items.append(parse_result) 6895 6896 return items 6897 6898 def _parse_tokens( 6899 self, parse_method: t.Callable, expressions: t.Dict 6900 ) -> t.Optional[exp.Expression]: 6901 this = parse_method() 6902 6903 while self._match_set(expressions): 6904 this = self.expression( 6905 expressions[self._prev.token_type], 6906 this=this, 6907 comments=self._prev_comments, 6908 expression=parse_method(), 6909 ) 6910 6911 return this 6912 6913 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6914 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6915 6916 def _parse_wrapped_csv( 6917 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6918 ) -> t.List[exp.Expression]: 6919 return self._parse_wrapped( 6920 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6921 ) 6922 6923 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6924 wrapped = self._match(TokenType.L_PAREN) 6925 if not wrapped and not optional: 6926 self.raise_error("Expecting (") 6927 parse_result = parse_method() 6928 if wrapped: 6929 self._match_r_paren() 6930 return parse_result 6931 6932 def _parse_expressions(self) -> t.List[exp.Expression]: 6933 return self._parse_csv(self._parse_expression) 6934 6935 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6936 return self._parse_select() or self._parse_set_operations( 6937 self._parse_alias(self._parse_assignment(), explicit=True) 6938 if alias 6939 else self._parse_assignment() 6940 ) 6941 6942 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6943 return self._parse_query_modifiers( 6944 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6945 ) 6946 6947 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6948 this = None 6949 if self._match_texts(self.TRANSACTION_KIND): 6950 this = self._prev.text 6951 6952 self._match_texts(("TRANSACTION", "WORK")) 6953 6954 modes = [] 6955 while True: 6956 mode = [] 6957 while self._match(TokenType.VAR): 6958 mode.append(self._prev.text) 6959 6960 if mode: 6961 modes.append(" ".join(mode)) 6962 if not self._match(TokenType.COMMA): 6963 break 6964 6965 return self.expression(exp.Transaction, this=this, modes=modes) 6966 6967 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6968 chain = None 6969 savepoint = None 6970 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6971 6972 self._match_texts(("TRANSACTION", "WORK")) 6973 6974 if self._match_text_seq("TO"): 6975 self._match_text_seq("SAVEPOINT") 6976 savepoint = self._parse_id_var() 6977 6978 if self._match(TokenType.AND): 6979 chain = not self._match_text_seq("NO") 6980 self._match_text_seq("CHAIN") 6981 6982 if is_rollback: 6983 return self.expression(exp.Rollback, savepoint=savepoint) 6984 6985 return self.expression(exp.Commit, chain=chain) 6986 6987 def _parse_refresh(self) -> exp.Refresh: 6988 self._match(TokenType.TABLE) 6989 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6990 6991 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6992 if not self._match_text_seq("ADD"): 6993 return None 6994 6995 self._match(TokenType.COLUMN) 6996 exists_column = self._parse_exists(not_=True) 6997 expression = self._parse_field_def() 6998 6999 if expression: 7000 expression.set("exists", exists_column) 7001 7002 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7003 if self._match_texts(("FIRST", "AFTER")): 7004 position = self._prev.text 7005 column_position = self.expression( 7006 exp.ColumnPosition, this=self._parse_column(), position=position 7007 ) 7008 expression.set("position", column_position) 7009 7010 return expression 7011 7012 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7013 drop = self._match(TokenType.DROP) and self._parse_drop() 7014 if drop and not isinstance(drop, exp.Command): 7015 drop.set("kind", drop.args.get("kind", "COLUMN")) 7016 return drop 7017 7018 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7019 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7020 return self.expression( 7021 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7022 ) 7023 7024 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7025 index = self._index - 1 7026 7027 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7028 return self._parse_csv( 7029 lambda: self.expression( 7030 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7031 ) 7032 ) 7033 7034 self._retreat(index) 7035 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7036 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7037 7038 if self._match_text_seq("ADD", "COLUMNS"): 7039 schema = self._parse_schema() 7040 if schema: 7041 return [schema] 7042 return [] 7043 7044 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7045 7046 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7047 if self._match_texts(self.ALTER_ALTER_PARSERS): 7048 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7049 7050 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7051 # keyword after ALTER we default to parsing this statement 7052 self._match(TokenType.COLUMN) 7053 column = self._parse_field(any_token=True) 7054 7055 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7056 return self.expression(exp.AlterColumn, this=column, drop=True) 7057 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7058 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7059 if self._match(TokenType.COMMENT): 7060 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7061 if self._match_text_seq("DROP", "NOT", "NULL"): 7062 return self.expression( 7063 exp.AlterColumn, 7064 this=column, 7065 drop=True, 7066 allow_null=True, 7067 ) 7068 if self._match_text_seq("SET", "NOT", "NULL"): 7069 return self.expression( 7070 exp.AlterColumn, 7071 this=column, 7072 allow_null=False, 7073 ) 7074 self._match_text_seq("SET", "DATA") 7075 self._match_text_seq("TYPE") 7076 return self.expression( 7077 exp.AlterColumn, 7078 this=column, 7079 dtype=self._parse_types(), 7080 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7081 using=self._match(TokenType.USING) and self._parse_assignment(), 7082 ) 7083 7084 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7085 if self._match_texts(("ALL", "EVEN", "AUTO")): 7086 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7087 7088 self._match_text_seq("KEY", "DISTKEY") 7089 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7090 7091 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7092 if compound: 7093 self._match_text_seq("SORTKEY") 7094 7095 if self._match(TokenType.L_PAREN, advance=False): 7096 return self.expression( 7097 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7098 ) 7099 7100 self._match_texts(("AUTO", "NONE")) 7101 return self.expression( 7102 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7103 ) 7104 7105 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7106 index = self._index - 1 7107 7108 partition_exists = self._parse_exists() 7109 if self._match(TokenType.PARTITION, advance=False): 7110 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7111 7112 self._retreat(index) 7113 return self._parse_csv(self._parse_drop_column) 7114 7115 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7116 if self._match(TokenType.COLUMN): 7117 exists = self._parse_exists() 7118 old_column = self._parse_column() 7119 to = self._match_text_seq("TO") 7120 new_column = self._parse_column() 7121 7122 if old_column is None or to is None or new_column is None: 7123 return None 7124 7125 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7126 7127 self._match_text_seq("TO") 7128 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7129 7130 def _parse_alter_table_set(self) -> exp.AlterSet: 7131 alter_set = self.expression(exp.AlterSet) 7132 7133 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7134 "TABLE", "PROPERTIES" 7135 ): 7136 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7137 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7138 alter_set.set("expressions", [self._parse_assignment()]) 7139 elif self._match_texts(("LOGGED", "UNLOGGED")): 7140 alter_set.set("option", exp.var(self._prev.text.upper())) 7141 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7142 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7143 elif self._match_text_seq("LOCATION"): 7144 alter_set.set("location", self._parse_field()) 7145 elif self._match_text_seq("ACCESS", "METHOD"): 7146 alter_set.set("access_method", self._parse_field()) 7147 elif self._match_text_seq("TABLESPACE"): 7148 alter_set.set("tablespace", self._parse_field()) 7149 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7150 alter_set.set("file_format", [self._parse_field()]) 7151 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7152 alter_set.set("file_format", self._parse_wrapped_options()) 7153 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7154 alter_set.set("copy_options", self._parse_wrapped_options()) 7155 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7156 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7157 else: 7158 if self._match_text_seq("SERDE"): 7159 alter_set.set("serde", self._parse_field()) 7160 7161 alter_set.set("expressions", [self._parse_properties()]) 7162 7163 return alter_set 7164 7165 def _parse_alter(self) -> exp.Alter | exp.Command: 7166 start = self._prev 7167 7168 alter_token = self._match_set(self.ALTERABLES) and self._prev 7169 if not alter_token: 7170 return self._parse_as_command(start) 7171 7172 exists = self._parse_exists() 7173 only = self._match_text_seq("ONLY") 7174 this = self._parse_table(schema=True) 7175 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7176 7177 if self._next: 7178 self._advance() 7179 7180 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7181 if parser: 7182 actions = ensure_list(parser(self)) 7183 not_valid = self._match_text_seq("NOT", "VALID") 7184 options = self._parse_csv(self._parse_property) 7185 7186 if not self._curr and actions: 7187 return self.expression( 7188 exp.Alter, 7189 this=this, 7190 kind=alter_token.text.upper(), 7191 exists=exists, 7192 actions=actions, 7193 only=only, 7194 options=options, 7195 cluster=cluster, 7196 not_valid=not_valid, 7197 ) 7198 7199 return self._parse_as_command(start) 7200 7201 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7202 start = self._prev 7203 # https://duckdb.org/docs/sql/statements/analyze 7204 if not self._curr: 7205 return self.expression(exp.Analyze) 7206 7207 options = [] 7208 while self._match_texts(self.ANALYZE_STYLES): 7209 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7210 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7211 else: 7212 options.append(self._prev.text.upper()) 7213 7214 this: t.Optional[exp.Expression] = None 7215 inner_expression: t.Optional[exp.Expression] = None 7216 7217 kind = self._curr and self._curr.text.upper() 7218 7219 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7220 this = self._parse_table_parts() 7221 elif self._match_text_seq("TABLES"): 7222 if self._match_set((TokenType.FROM, TokenType.IN)): 7223 kind = f"{kind} {self._prev.text.upper()}" 7224 this = self._parse_table(schema=True, is_db_reference=True) 7225 elif self._match_text_seq("DATABASE"): 7226 this = self._parse_table(schema=True, is_db_reference=True) 7227 elif self._match_text_seq("CLUSTER"): 7228 this = self._parse_table() 7229 # Try matching inner expr keywords before fallback to parse table. 7230 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7231 kind = None 7232 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7233 else: 7234 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7235 kind = None 7236 this = self._parse_table_parts() 7237 7238 partition = self._try_parse(self._parse_partition) 7239 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7240 return self._parse_as_command(start) 7241 7242 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7243 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7244 "WITH", "ASYNC", "MODE" 7245 ): 7246 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7247 else: 7248 mode = None 7249 7250 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7251 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7252 7253 properties = self._parse_properties() 7254 return self.expression( 7255 exp.Analyze, 7256 kind=kind, 7257 this=this, 7258 mode=mode, 7259 partition=partition, 7260 properties=properties, 7261 expression=inner_expression, 7262 options=options, 7263 ) 7264 7265 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7266 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7267 this = None 7268 kind = self._prev.text.upper() 7269 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7270 expressions = [] 7271 7272 if not self._match_text_seq("STATISTICS"): 7273 self.raise_error("Expecting token STATISTICS") 7274 7275 if self._match_text_seq("NOSCAN"): 7276 this = "NOSCAN" 7277 elif self._match(TokenType.FOR): 7278 if self._match_text_seq("ALL", "COLUMNS"): 7279 this = "FOR ALL COLUMNS" 7280 if self._match_texts("COLUMNS"): 7281 this = "FOR COLUMNS" 7282 expressions = self._parse_csv(self._parse_column_reference) 7283 elif self._match_text_seq("SAMPLE"): 7284 sample = self._parse_number() 7285 expressions = [ 7286 self.expression( 7287 exp.AnalyzeSample, 7288 sample=sample, 7289 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7290 ) 7291 ] 7292 7293 return self.expression( 7294 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7295 ) 7296 7297 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7298 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7299 kind = None 7300 this = None 7301 expression: t.Optional[exp.Expression] = None 7302 if self._match_text_seq("REF", "UPDATE"): 7303 kind = "REF" 7304 this = "UPDATE" 7305 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7306 this = "UPDATE SET DANGLING TO NULL" 7307 elif self._match_text_seq("STRUCTURE"): 7308 kind = "STRUCTURE" 7309 if self._match_text_seq("CASCADE", "FAST"): 7310 this = "CASCADE FAST" 7311 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7312 ("ONLINE", "OFFLINE") 7313 ): 7314 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7315 expression = self._parse_into() 7316 7317 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7318 7319 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7320 this = self._prev.text.upper() 7321 if self._match_text_seq("COLUMNS"): 7322 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7323 return None 7324 7325 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7326 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7327 if self._match_text_seq("STATISTICS"): 7328 return self.expression(exp.AnalyzeDelete, kind=kind) 7329 return None 7330 7331 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7332 if self._match_text_seq("CHAINED", "ROWS"): 7333 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7334 return None 7335 7336 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7337 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7338 this = self._prev.text.upper() 7339 expression: t.Optional[exp.Expression] = None 7340 expressions = [] 7341 update_options = None 7342 7343 if self._match_text_seq("HISTOGRAM", "ON"): 7344 expressions = self._parse_csv(self._parse_column_reference) 7345 with_expressions = [] 7346 while self._match(TokenType.WITH): 7347 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7348 if self._match_texts(("SYNC", "ASYNC")): 7349 if self._match_text_seq("MODE", advance=False): 7350 with_expressions.append(f"{self._prev.text.upper()} MODE") 7351 self._advance() 7352 else: 7353 buckets = self._parse_number() 7354 if self._match_text_seq("BUCKETS"): 7355 with_expressions.append(f"{buckets} BUCKETS") 7356 if with_expressions: 7357 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7358 7359 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7360 TokenType.UPDATE, advance=False 7361 ): 7362 update_options = self._prev.text.upper() 7363 self._advance() 7364 elif self._match_text_seq("USING", "DATA"): 7365 expression = self.expression(exp.UsingData, this=self._parse_string()) 7366 7367 return self.expression( 7368 exp.AnalyzeHistogram, 7369 this=this, 7370 expressions=expressions, 7371 expression=expression, 7372 update_options=update_options, 7373 ) 7374 7375 def _parse_merge(self) -> exp.Merge: 7376 self._match(TokenType.INTO) 7377 target = self._parse_table() 7378 7379 if target and self._match(TokenType.ALIAS, advance=False): 7380 target.set("alias", self._parse_table_alias()) 7381 7382 self._match(TokenType.USING) 7383 using = self._parse_table() 7384 7385 self._match(TokenType.ON) 7386 on = self._parse_assignment() 7387 7388 return self.expression( 7389 exp.Merge, 7390 this=target, 7391 using=using, 7392 on=on, 7393 whens=self._parse_when_matched(), 7394 returning=self._parse_returning(), 7395 ) 7396 7397 def _parse_when_matched(self) -> exp.Whens: 7398 whens = [] 7399 7400 while self._match(TokenType.WHEN): 7401 matched = not self._match(TokenType.NOT) 7402 self._match_text_seq("MATCHED") 7403 source = ( 7404 False 7405 if self._match_text_seq("BY", "TARGET") 7406 else self._match_text_seq("BY", "SOURCE") 7407 ) 7408 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7409 7410 self._match(TokenType.THEN) 7411 7412 if self._match(TokenType.INSERT): 7413 this = self._parse_star() 7414 if this: 7415 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7416 else: 7417 then = self.expression( 7418 exp.Insert, 7419 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7420 expression=self._match_text_seq("VALUES") and self._parse_value(), 7421 ) 7422 elif self._match(TokenType.UPDATE): 7423 expressions = self._parse_star() 7424 if expressions: 7425 then = self.expression(exp.Update, expressions=expressions) 7426 else: 7427 then = self.expression( 7428 exp.Update, 7429 expressions=self._match(TokenType.SET) 7430 and self._parse_csv(self._parse_equality), 7431 ) 7432 elif self._match(TokenType.DELETE): 7433 then = self.expression(exp.Var, this=self._prev.text) 7434 else: 7435 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7436 7437 whens.append( 7438 self.expression( 7439 exp.When, 7440 matched=matched, 7441 source=source, 7442 condition=condition, 7443 then=then, 7444 ) 7445 ) 7446 return self.expression(exp.Whens, expressions=whens) 7447 7448 def _parse_show(self) -> t.Optional[exp.Expression]: 7449 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7450 if parser: 7451 return parser(self) 7452 return self._parse_as_command(self._prev) 7453 7454 def _parse_set_item_assignment( 7455 self, kind: t.Optional[str] = None 7456 ) -> t.Optional[exp.Expression]: 7457 index = self._index 7458 7459 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7460 return self._parse_set_transaction(global_=kind == "GLOBAL") 7461 7462 left = self._parse_primary() or self._parse_column() 7463 assignment_delimiter = self._match_texts(("=", "TO")) 7464 7465 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7466 self._retreat(index) 7467 return None 7468 7469 right = self._parse_statement() or self._parse_id_var() 7470 if isinstance(right, (exp.Column, exp.Identifier)): 7471 right = exp.var(right.name) 7472 7473 this = self.expression(exp.EQ, this=left, expression=right) 7474 return self.expression(exp.SetItem, this=this, kind=kind) 7475 7476 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7477 self._match_text_seq("TRANSACTION") 7478 characteristics = self._parse_csv( 7479 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7480 ) 7481 return self.expression( 7482 exp.SetItem, 7483 expressions=characteristics, 7484 kind="TRANSACTION", 7485 **{"global": global_}, # type: ignore 7486 ) 7487 7488 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7489 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7490 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7491 7492 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7493 index = self._index 7494 set_ = self.expression( 7495 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7496 ) 7497 7498 if self._curr: 7499 self._retreat(index) 7500 return self._parse_as_command(self._prev) 7501 7502 return set_ 7503 7504 def _parse_var_from_options( 7505 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7506 ) -> t.Optional[exp.Var]: 7507 start = self._curr 7508 if not start: 7509 return None 7510 7511 option = start.text.upper() 7512 continuations = options.get(option) 7513 7514 index = self._index 7515 self._advance() 7516 for keywords in continuations or []: 7517 if isinstance(keywords, str): 7518 keywords = (keywords,) 7519 7520 if self._match_text_seq(*keywords): 7521 option = f"{option} {' '.join(keywords)}" 7522 break 7523 else: 7524 if continuations or continuations is None: 7525 if raise_unmatched: 7526 self.raise_error(f"Unknown option {option}") 7527 7528 self._retreat(index) 7529 return None 7530 7531 return exp.var(option) 7532 7533 def _parse_as_command(self, start: Token) -> exp.Command: 7534 while self._curr: 7535 self._advance() 7536 text = self._find_sql(start, self._prev) 7537 size = len(start.text) 7538 self._warn_unsupported() 7539 return exp.Command(this=text[:size], expression=text[size:]) 7540 7541 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7542 settings = [] 7543 7544 self._match_l_paren() 7545 kind = self._parse_id_var() 7546 7547 if self._match(TokenType.L_PAREN): 7548 while True: 7549 key = self._parse_id_var() 7550 value = self._parse_primary() 7551 if not key and value is None: 7552 break 7553 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7554 self._match(TokenType.R_PAREN) 7555 7556 self._match_r_paren() 7557 7558 return self.expression( 7559 exp.DictProperty, 7560 this=this, 7561 kind=kind.this if kind else None, 7562 settings=settings, 7563 ) 7564 7565 def _parse_dict_range(self, this: str) -> exp.DictRange: 7566 self._match_l_paren() 7567 has_min = self._match_text_seq("MIN") 7568 if has_min: 7569 min = self._parse_var() or self._parse_primary() 7570 self._match_text_seq("MAX") 7571 max = self._parse_var() or self._parse_primary() 7572 else: 7573 max = self._parse_var() or self._parse_primary() 7574 min = exp.Literal.number(0) 7575 self._match_r_paren() 7576 return self.expression(exp.DictRange, this=this, min=min, max=max) 7577 7578 def _parse_comprehension( 7579 self, this: t.Optional[exp.Expression] 7580 ) -> t.Optional[exp.Comprehension]: 7581 index = self._index 7582 expression = self._parse_column() 7583 if not self._match(TokenType.IN): 7584 self._retreat(index - 1) 7585 return None 7586 iterator = self._parse_column() 7587 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7588 return self.expression( 7589 exp.Comprehension, 7590 this=this, 7591 expression=expression, 7592 iterator=iterator, 7593 condition=condition, 7594 ) 7595 7596 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7597 if self._match(TokenType.HEREDOC_STRING): 7598 return self.expression(exp.Heredoc, this=self._prev.text) 7599 7600 if not self._match_text_seq("$"): 7601 return None 7602 7603 tags = ["$"] 7604 tag_text = None 7605 7606 if self._is_connected(): 7607 self._advance() 7608 tags.append(self._prev.text.upper()) 7609 else: 7610 self.raise_error("No closing $ found") 7611 7612 if tags[-1] != "$": 7613 if self._is_connected() and self._match_text_seq("$"): 7614 tag_text = tags[-1] 7615 tags.append("$") 7616 else: 7617 self.raise_error("No closing $ found") 7618 7619 heredoc_start = self._curr 7620 7621 while self._curr: 7622 if self._match_text_seq(*tags, advance=False): 7623 this = self._find_sql(heredoc_start, self._prev) 7624 self._advance(len(tags)) 7625 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7626 7627 self._advance() 7628 7629 self.raise_error(f"No closing {''.join(tags)} found") 7630 return None 7631 7632 def _find_parser( 7633 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7634 ) -> t.Optional[t.Callable]: 7635 if not self._curr: 7636 return None 7637 7638 index = self._index 7639 this = [] 7640 while True: 7641 # The current token might be multiple words 7642 curr = self._curr.text.upper() 7643 key = curr.split(" ") 7644 this.append(curr) 7645 7646 self._advance() 7647 result, trie = in_trie(trie, key) 7648 if result == TrieResult.FAILED: 7649 break 7650 7651 if result == TrieResult.EXISTS: 7652 subparser = parsers[" ".join(this)] 7653 return subparser 7654 7655 self._retreat(index) 7656 return None 7657 7658 def _match(self, token_type, advance=True, expression=None): 7659 if not self._curr: 7660 return None 7661 7662 if self._curr.token_type == token_type: 7663 if advance: 7664 self._advance() 7665 self._add_comments(expression) 7666 return True 7667 7668 return None 7669 7670 def _match_set(self, types, advance=True): 7671 if not self._curr: 7672 return None 7673 7674 if self._curr.token_type in types: 7675 if advance: 7676 self._advance() 7677 return True 7678 7679 return None 7680 7681 def _match_pair(self, token_type_a, token_type_b, advance=True): 7682 if not self._curr or not self._next: 7683 return None 7684 7685 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7686 if advance: 7687 self._advance(2) 7688 return True 7689 7690 return None 7691 7692 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7693 if not self._match(TokenType.L_PAREN, expression=expression): 7694 self.raise_error("Expecting (") 7695 7696 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7697 if not self._match(TokenType.R_PAREN, expression=expression): 7698 self.raise_error("Expecting )") 7699 7700 def _match_texts(self, texts, advance=True): 7701 if ( 7702 self._curr 7703 and self._curr.token_type != TokenType.STRING 7704 and self._curr.text.upper() in texts 7705 ): 7706 if advance: 7707 self._advance() 7708 return True 7709 return None 7710 7711 def _match_text_seq(self, *texts, advance=True): 7712 index = self._index 7713 for text in texts: 7714 if ( 7715 self._curr 7716 and self._curr.token_type != TokenType.STRING 7717 and self._curr.text.upper() == text 7718 ): 7719 self._advance() 7720 else: 7721 self._retreat(index) 7722 return None 7723 7724 if not advance: 7725 self._retreat(index) 7726 7727 return True 7728 7729 def _replace_lambda( 7730 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7731 ) -> t.Optional[exp.Expression]: 7732 if not node: 7733 return node 7734 7735 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7736 7737 for column in node.find_all(exp.Column): 7738 typ = lambda_types.get(column.parts[0].name) 7739 if typ is not None: 7740 dot_or_id = column.to_dot() if column.table else column.this 7741 7742 if typ: 7743 dot_or_id = self.expression( 7744 exp.Cast, 7745 this=dot_or_id, 7746 to=typ, 7747 ) 7748 7749 parent = column.parent 7750 7751 while isinstance(parent, exp.Dot): 7752 if not isinstance(parent.parent, exp.Dot): 7753 parent.replace(dot_or_id) 7754 break 7755 parent = parent.parent 7756 else: 7757 if column is node: 7758 node = dot_or_id 7759 else: 7760 column.replace(dot_or_id) 7761 return node 7762 7763 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7764 start = self._prev 7765 7766 # Not to be confused with TRUNCATE(number, decimals) function call 7767 if self._match(TokenType.L_PAREN): 7768 self._retreat(self._index - 2) 7769 return self._parse_function() 7770 7771 # Clickhouse supports TRUNCATE DATABASE as well 7772 is_database = self._match(TokenType.DATABASE) 7773 7774 self._match(TokenType.TABLE) 7775 7776 exists = self._parse_exists(not_=False) 7777 7778 expressions = self._parse_csv( 7779 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7780 ) 7781 7782 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7783 7784 if self._match_text_seq("RESTART", "IDENTITY"): 7785 identity = "RESTART" 7786 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7787 identity = "CONTINUE" 7788 else: 7789 identity = None 7790 7791 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7792 option = self._prev.text 7793 else: 7794 option = None 7795 7796 partition = self._parse_partition() 7797 7798 # Fallback case 7799 if self._curr: 7800 return self._parse_as_command(start) 7801 7802 return self.expression( 7803 exp.TruncateTable, 7804 expressions=expressions, 7805 is_database=is_database, 7806 exists=exists, 7807 cluster=cluster, 7808 identity=identity, 7809 option=option, 7810 partition=partition, 7811 ) 7812 7813 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7814 this = self._parse_ordered(self._parse_opclass) 7815 7816 if not self._match(TokenType.WITH): 7817 return this 7818 7819 op = self._parse_var(any_token=True) 7820 7821 return self.expression(exp.WithOperator, this=this, op=op) 7822 7823 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7824 self._match(TokenType.EQ) 7825 self._match(TokenType.L_PAREN) 7826 7827 opts: t.List[t.Optional[exp.Expression]] = [] 7828 while self._curr and not self._match(TokenType.R_PAREN): 7829 if self._match_text_seq("FORMAT_NAME", "="): 7830 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7831 # so we parse it separately to use _parse_field() 7832 prop = self.expression( 7833 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7834 ) 7835 opts.append(prop) 7836 else: 7837 opts.append(self._parse_property()) 7838 7839 self._match(TokenType.COMMA) 7840 7841 return opts 7842 7843 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7844 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7845 7846 options = [] 7847 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7848 option = self._parse_var(any_token=True) 7849 prev = self._prev.text.upper() 7850 7851 # Different dialects might separate options and values by white space, "=" and "AS" 7852 self._match(TokenType.EQ) 7853 self._match(TokenType.ALIAS) 7854 7855 param = self.expression(exp.CopyParameter, this=option) 7856 7857 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7858 TokenType.L_PAREN, advance=False 7859 ): 7860 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7861 param.set("expressions", self._parse_wrapped_options()) 7862 elif prev == "FILE_FORMAT": 7863 # T-SQL's external file format case 7864 param.set("expression", self._parse_field()) 7865 else: 7866 param.set("expression", self._parse_unquoted_field()) 7867 7868 options.append(param) 7869 self._match(sep) 7870 7871 return options 7872 7873 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7874 expr = self.expression(exp.Credentials) 7875 7876 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7877 expr.set("storage", self._parse_field()) 7878 if self._match_text_seq("CREDENTIALS"): 7879 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7880 creds = ( 7881 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7882 ) 7883 expr.set("credentials", creds) 7884 if self._match_text_seq("ENCRYPTION"): 7885 expr.set("encryption", self._parse_wrapped_options()) 7886 if self._match_text_seq("IAM_ROLE"): 7887 expr.set("iam_role", self._parse_field()) 7888 if self._match_text_seq("REGION"): 7889 expr.set("region", self._parse_field()) 7890 7891 return expr 7892 7893 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7894 return self._parse_field() 7895 7896 def _parse_copy(self) -> exp.Copy | exp.Command: 7897 start = self._prev 7898 7899 self._match(TokenType.INTO) 7900 7901 this = ( 7902 self._parse_select(nested=True, parse_subquery_alias=False) 7903 if self._match(TokenType.L_PAREN, advance=False) 7904 else self._parse_table(schema=True) 7905 ) 7906 7907 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7908 7909 files = self._parse_csv(self._parse_file_location) 7910 credentials = self._parse_credentials() 7911 7912 self._match_text_seq("WITH") 7913 7914 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7915 7916 # Fallback case 7917 if self._curr: 7918 return self._parse_as_command(start) 7919 7920 return self.expression( 7921 exp.Copy, 7922 this=this, 7923 kind=kind, 7924 credentials=credentials, 7925 files=files, 7926 params=params, 7927 ) 7928 7929 def _parse_normalize(self) -> exp.Normalize: 7930 return self.expression( 7931 exp.Normalize, 7932 this=self._parse_bitwise(), 7933 form=self._match(TokenType.COMMA) and self._parse_var(), 7934 ) 7935 7936 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7937 args = self._parse_csv(lambda: self._parse_lambda()) 7938 7939 this = seq_get(args, 0) 7940 decimals = seq_get(args, 1) 7941 7942 return expr_type( 7943 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7944 ) 7945 7946 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7947 if self._match_text_seq("COLUMNS", "(", advance=False): 7948 this = self._parse_function() 7949 if isinstance(this, exp.Columns): 7950 this.set("unpack", True) 7951 return this 7952 7953 return self.expression( 7954 exp.Star, 7955 **{ # type: ignore 7956 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7957 "replace": self._parse_star_op("REPLACE"), 7958 "rename": self._parse_star_op("RENAME"), 7959 }, 7960 ) 7961 7962 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7963 privilege_parts = [] 7964 7965 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7966 # (end of privilege list) or L_PAREN (start of column list) are met 7967 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7968 privilege_parts.append(self._curr.text.upper()) 7969 self._advance() 7970 7971 this = exp.var(" ".join(privilege_parts)) 7972 expressions = ( 7973 self._parse_wrapped_csv(self._parse_column) 7974 if self._match(TokenType.L_PAREN, advance=False) 7975 else None 7976 ) 7977 7978 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7979 7980 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7981 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7982 principal = self._parse_id_var() 7983 7984 if not principal: 7985 return None 7986 7987 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7988 7989 def _parse_grant(self) -> exp.Grant | exp.Command: 7990 start = self._prev 7991 7992 privileges = self._parse_csv(self._parse_grant_privilege) 7993 7994 self._match(TokenType.ON) 7995 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7996 7997 # Attempt to parse the securable e.g. MySQL allows names 7998 # such as "foo.*", "*.*" which are not easily parseable yet 7999 securable = self._try_parse(self._parse_table_parts) 8000 8001 if not securable or not self._match_text_seq("TO"): 8002 return self._parse_as_command(start) 8003 8004 principals = self._parse_csv(self._parse_grant_principal) 8005 8006 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8007 8008 if self._curr: 8009 return self._parse_as_command(start) 8010 8011 return self.expression( 8012 exp.Grant, 8013 privileges=privileges, 8014 kind=kind, 8015 securable=securable, 8016 principals=principals, 8017 grant_option=grant_option, 8018 ) 8019 8020 def _parse_overlay(self) -> exp.Overlay: 8021 return self.expression( 8022 exp.Overlay, 8023 **{ # type: ignore 8024 "this": self._parse_bitwise(), 8025 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8026 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8027 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8028 }, 8029 )
27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range
59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder
102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression)
122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp
133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args)
175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.CHAR, 323 TokenType.NCHAR, 324 TokenType.VARCHAR, 325 TokenType.NVARCHAR, 326 TokenType.BPCHAR, 327 TokenType.TEXT, 328 TokenType.MEDIUMTEXT, 329 TokenType.LONGTEXT, 330 TokenType.MEDIUMBLOB, 331 TokenType.LONGBLOB, 332 TokenType.BINARY, 333 TokenType.VARBINARY, 334 TokenType.JSON, 335 TokenType.JSONB, 336 TokenType.INTERVAL, 337 TokenType.TINYBLOB, 338 TokenType.TINYTEXT, 339 TokenType.TIME, 340 TokenType.TIMETZ, 341 TokenType.TIMESTAMP, 342 TokenType.TIMESTAMP_S, 343 TokenType.TIMESTAMP_MS, 344 TokenType.TIMESTAMP_NS, 345 TokenType.TIMESTAMPTZ, 346 TokenType.TIMESTAMPLTZ, 347 TokenType.TIMESTAMPNTZ, 348 TokenType.DATETIME, 349 TokenType.DATETIME2, 350 TokenType.DATETIME64, 351 TokenType.SMALLDATETIME, 352 TokenType.DATE, 353 TokenType.DATE32, 354 TokenType.INT4RANGE, 355 TokenType.INT4MULTIRANGE, 356 TokenType.INT8RANGE, 357 TokenType.INT8MULTIRANGE, 358 TokenType.NUMRANGE, 359 TokenType.NUMMULTIRANGE, 360 TokenType.TSRANGE, 361 TokenType.TSMULTIRANGE, 362 TokenType.TSTZRANGE, 363 TokenType.TSTZMULTIRANGE, 364 TokenType.DATERANGE, 365 TokenType.DATEMULTIRANGE, 366 TokenType.DECIMAL, 367 TokenType.DECIMAL32, 368 TokenType.DECIMAL64, 369 TokenType.DECIMAL128, 370 TokenType.DECIMAL256, 371 TokenType.UDECIMAL, 372 TokenType.BIGDECIMAL, 373 TokenType.UUID, 374 TokenType.GEOGRAPHY, 375 TokenType.GEOMETRY, 376 TokenType.POINT, 377 TokenType.RING, 378 TokenType.LINESTRING, 379 TokenType.MULTILINESTRING, 380 TokenType.POLYGON, 381 TokenType.MULTIPOLYGON, 382 TokenType.HLLSKETCH, 383 TokenType.HSTORE, 384 TokenType.PSEUDO_TYPE, 385 TokenType.SUPER, 386 TokenType.SERIAL, 387 TokenType.SMALLSERIAL, 388 TokenType.BIGSERIAL, 389 TokenType.XML, 390 TokenType.YEAR, 391 TokenType.USERDEFINED, 392 TokenType.MONEY, 393 TokenType.SMALLMONEY, 394 TokenType.ROWVERSION, 395 TokenType.IMAGE, 396 TokenType.VARIANT, 397 TokenType.VECTOR, 398 TokenType.OBJECT, 399 TokenType.OBJECT_IDENTIFIER, 400 TokenType.INET, 401 TokenType.IPADDRESS, 402 TokenType.IPPREFIX, 403 TokenType.IPV4, 404 TokenType.IPV6, 405 TokenType.UNKNOWN, 406 TokenType.NULL, 407 TokenType.NAME, 408 TokenType.TDIGEST, 409 TokenType.DYNAMIC, 410 *ENUM_TYPE_TOKENS, 411 *NESTED_TYPE_TOKENS, 412 *AGGREGATE_TYPE_TOKENS, 413 } 414 415 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 416 TokenType.BIGINT: TokenType.UBIGINT, 417 TokenType.INT: TokenType.UINT, 418 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 419 TokenType.SMALLINT: TokenType.USMALLINT, 420 TokenType.TINYINT: TokenType.UTINYINT, 421 TokenType.DECIMAL: TokenType.UDECIMAL, 422 } 423 424 SUBQUERY_PREDICATES = { 425 TokenType.ANY: exp.Any, 426 TokenType.ALL: exp.All, 427 TokenType.EXISTS: exp.Exists, 428 TokenType.SOME: exp.Any, 429 } 430 431 RESERVED_TOKENS = { 432 *Tokenizer.SINGLE_TOKENS.values(), 433 TokenType.SELECT, 434 } - {TokenType.IDENTIFIER} 435 436 DB_CREATABLES = { 437 TokenType.DATABASE, 438 TokenType.DICTIONARY, 439 TokenType.MODEL, 440 TokenType.NAMESPACE, 441 TokenType.SCHEMA, 442 TokenType.SEQUENCE, 443 TokenType.SINK, 444 TokenType.SOURCE, 445 TokenType.STORAGE_INTEGRATION, 446 TokenType.STREAMLIT, 447 TokenType.TABLE, 448 TokenType.TAG, 449 TokenType.VIEW, 450 TokenType.WAREHOUSE, 451 } 452 453 CREATABLES = { 454 TokenType.COLUMN, 455 TokenType.CONSTRAINT, 456 TokenType.FOREIGN_KEY, 457 TokenType.FUNCTION, 458 TokenType.INDEX, 459 TokenType.PROCEDURE, 460 *DB_CREATABLES, 461 } 462 463 ALTERABLES = { 464 TokenType.INDEX, 465 TokenType.TABLE, 466 TokenType.VIEW, 467 } 468 469 # Tokens that can represent identifiers 470 ID_VAR_TOKENS = { 471 TokenType.ALL, 472 TokenType.ATTACH, 473 TokenType.VAR, 474 TokenType.ANTI, 475 TokenType.APPLY, 476 TokenType.ASC, 477 TokenType.ASOF, 478 TokenType.AUTO_INCREMENT, 479 TokenType.BEGIN, 480 TokenType.BPCHAR, 481 TokenType.CACHE, 482 TokenType.CASE, 483 TokenType.COLLATE, 484 TokenType.COMMAND, 485 TokenType.COMMENT, 486 TokenType.COMMIT, 487 TokenType.CONSTRAINT, 488 TokenType.COPY, 489 TokenType.CUBE, 490 TokenType.CURRENT_SCHEMA, 491 TokenType.DEFAULT, 492 TokenType.DELETE, 493 TokenType.DESC, 494 TokenType.DESCRIBE, 495 TokenType.DETACH, 496 TokenType.DICTIONARY, 497 TokenType.DIV, 498 TokenType.END, 499 TokenType.EXECUTE, 500 TokenType.EXPORT, 501 TokenType.ESCAPE, 502 TokenType.FALSE, 503 TokenType.FIRST, 504 TokenType.FILTER, 505 TokenType.FINAL, 506 TokenType.FORMAT, 507 TokenType.FULL, 508 TokenType.IDENTIFIER, 509 TokenType.IS, 510 TokenType.ISNULL, 511 TokenType.INTERVAL, 512 TokenType.KEEP, 513 TokenType.KILL, 514 TokenType.LEFT, 515 TokenType.LIMIT, 516 TokenType.LOAD, 517 TokenType.MERGE, 518 TokenType.NATURAL, 519 TokenType.NEXT, 520 TokenType.OFFSET, 521 TokenType.OPERATOR, 522 TokenType.ORDINALITY, 523 TokenType.OVERLAPS, 524 TokenType.OVERWRITE, 525 TokenType.PARTITION, 526 TokenType.PERCENT, 527 TokenType.PIVOT, 528 TokenType.PRAGMA, 529 TokenType.RANGE, 530 TokenType.RECURSIVE, 531 TokenType.REFERENCES, 532 TokenType.REFRESH, 533 TokenType.RENAME, 534 TokenType.REPLACE, 535 TokenType.RIGHT, 536 TokenType.ROLLUP, 537 TokenType.ROW, 538 TokenType.ROWS, 539 TokenType.SEMI, 540 TokenType.SET, 541 TokenType.SETTINGS, 542 TokenType.SHOW, 543 TokenType.TEMPORARY, 544 TokenType.TOP, 545 TokenType.TRUE, 546 TokenType.TRUNCATE, 547 TokenType.UNIQUE, 548 TokenType.UNNEST, 549 TokenType.UNPIVOT, 550 TokenType.UPDATE, 551 TokenType.USE, 552 TokenType.VOLATILE, 553 TokenType.WINDOW, 554 *CREATABLES, 555 *SUBQUERY_PREDICATES, 556 *TYPE_TOKENS, 557 *NO_PAREN_FUNCTIONS, 558 } 559 ID_VAR_TOKENS.remove(TokenType.UNION) 560 561 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 562 TokenType.ANTI, 563 TokenType.APPLY, 564 TokenType.ASOF, 565 TokenType.FULL, 566 TokenType.LEFT, 567 TokenType.LOCK, 568 TokenType.NATURAL, 569 TokenType.RIGHT, 570 TokenType.SEMI, 571 TokenType.WINDOW, 572 } 573 574 ALIAS_TOKENS = ID_VAR_TOKENS 575 576 ARRAY_CONSTRUCTORS = { 577 "ARRAY": exp.Array, 578 "LIST": exp.List, 579 } 580 581 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 582 583 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 584 585 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 586 587 FUNC_TOKENS = { 588 TokenType.COLLATE, 589 TokenType.COMMAND, 590 TokenType.CURRENT_DATE, 591 TokenType.CURRENT_DATETIME, 592 TokenType.CURRENT_SCHEMA, 593 TokenType.CURRENT_TIMESTAMP, 594 TokenType.CURRENT_TIME, 595 TokenType.CURRENT_USER, 596 TokenType.FILTER, 597 TokenType.FIRST, 598 TokenType.FORMAT, 599 TokenType.GLOB, 600 TokenType.IDENTIFIER, 601 TokenType.INDEX, 602 TokenType.ISNULL, 603 TokenType.ILIKE, 604 TokenType.INSERT, 605 TokenType.LIKE, 606 TokenType.MERGE, 607 TokenType.NEXT, 608 TokenType.OFFSET, 609 TokenType.PRIMARY_KEY, 610 TokenType.RANGE, 611 TokenType.REPLACE, 612 TokenType.RLIKE, 613 TokenType.ROW, 614 TokenType.UNNEST, 615 TokenType.VAR, 616 TokenType.LEFT, 617 TokenType.RIGHT, 618 TokenType.SEQUENCE, 619 TokenType.DATE, 620 TokenType.DATETIME, 621 TokenType.TABLE, 622 TokenType.TIMESTAMP, 623 TokenType.TIMESTAMPTZ, 624 TokenType.TRUNCATE, 625 TokenType.WINDOW, 626 TokenType.XOR, 627 *TYPE_TOKENS, 628 *SUBQUERY_PREDICATES, 629 } 630 631 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 632 TokenType.AND: exp.And, 633 } 634 635 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 636 TokenType.COLON_EQ: exp.PropertyEQ, 637 } 638 639 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 640 TokenType.OR: exp.Or, 641 } 642 643 EQUALITY = { 644 TokenType.EQ: exp.EQ, 645 TokenType.NEQ: exp.NEQ, 646 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 647 } 648 649 COMPARISON = { 650 TokenType.GT: exp.GT, 651 TokenType.GTE: exp.GTE, 652 TokenType.LT: exp.LT, 653 TokenType.LTE: exp.LTE, 654 } 655 656 BITWISE = { 657 TokenType.AMP: exp.BitwiseAnd, 658 TokenType.CARET: exp.BitwiseXor, 659 TokenType.PIPE: exp.BitwiseOr, 660 } 661 662 TERM = { 663 TokenType.DASH: exp.Sub, 664 TokenType.PLUS: exp.Add, 665 TokenType.MOD: exp.Mod, 666 TokenType.COLLATE: exp.Collate, 667 } 668 669 FACTOR = { 670 TokenType.DIV: exp.IntDiv, 671 TokenType.LR_ARROW: exp.Distance, 672 TokenType.SLASH: exp.Div, 673 TokenType.STAR: exp.Mul, 674 } 675 676 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 677 678 TIMES = { 679 TokenType.TIME, 680 TokenType.TIMETZ, 681 } 682 683 TIMESTAMPS = { 684 TokenType.TIMESTAMP, 685 TokenType.TIMESTAMPTZ, 686 TokenType.TIMESTAMPLTZ, 687 *TIMES, 688 } 689 690 SET_OPERATIONS = { 691 TokenType.UNION, 692 TokenType.INTERSECT, 693 TokenType.EXCEPT, 694 } 695 696 JOIN_METHODS = { 697 TokenType.ASOF, 698 TokenType.NATURAL, 699 TokenType.POSITIONAL, 700 } 701 702 JOIN_SIDES = { 703 TokenType.LEFT, 704 TokenType.RIGHT, 705 TokenType.FULL, 706 } 707 708 JOIN_KINDS = { 709 TokenType.ANTI, 710 TokenType.CROSS, 711 TokenType.INNER, 712 TokenType.OUTER, 713 TokenType.SEMI, 714 TokenType.STRAIGHT_JOIN, 715 } 716 717 JOIN_HINTS: t.Set[str] = set() 718 719 LAMBDAS = { 720 TokenType.ARROW: lambda self, expressions: self.expression( 721 exp.Lambda, 722 this=self._replace_lambda( 723 self._parse_assignment(), 724 expressions, 725 ), 726 expressions=expressions, 727 ), 728 TokenType.FARROW: lambda self, expressions: self.expression( 729 exp.Kwarg, 730 this=exp.var(expressions[0].name), 731 expression=self._parse_assignment(), 732 ), 733 } 734 735 COLUMN_OPERATORS = { 736 TokenType.DOT: None, 737 TokenType.DCOLON: lambda self, this, to: self.expression( 738 exp.Cast if self.STRICT_CAST else exp.TryCast, 739 this=this, 740 to=to, 741 ), 742 TokenType.ARROW: lambda self, this, path: self.expression( 743 exp.JSONExtract, 744 this=this, 745 expression=self.dialect.to_json_path(path), 746 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 747 ), 748 TokenType.DARROW: lambda self, this, path: self.expression( 749 exp.JSONExtractScalar, 750 this=this, 751 expression=self.dialect.to_json_path(path), 752 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 753 ), 754 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 755 exp.JSONBExtract, 756 this=this, 757 expression=path, 758 ), 759 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 760 exp.JSONBExtractScalar, 761 this=this, 762 expression=path, 763 ), 764 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 765 exp.JSONBContains, 766 this=this, 767 expression=key, 768 ), 769 } 770 771 EXPRESSION_PARSERS = { 772 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 773 exp.Column: lambda self: self._parse_column(), 774 exp.Condition: lambda self: self._parse_assignment(), 775 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 776 exp.Expression: lambda self: self._parse_expression(), 777 exp.From: lambda self: self._parse_from(joins=True), 778 exp.Group: lambda self: self._parse_group(), 779 exp.Having: lambda self: self._parse_having(), 780 exp.Hint: lambda self: self._parse_hint_body(), 781 exp.Identifier: lambda self: self._parse_id_var(), 782 exp.Join: lambda self: self._parse_join(), 783 exp.Lambda: lambda self: self._parse_lambda(), 784 exp.Lateral: lambda self: self._parse_lateral(), 785 exp.Limit: lambda self: self._parse_limit(), 786 exp.Offset: lambda self: self._parse_offset(), 787 exp.Order: lambda self: self._parse_order(), 788 exp.Ordered: lambda self: self._parse_ordered(), 789 exp.Properties: lambda self: self._parse_properties(), 790 exp.Qualify: lambda self: self._parse_qualify(), 791 exp.Returning: lambda self: self._parse_returning(), 792 exp.Select: lambda self: self._parse_select(), 793 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 794 exp.Table: lambda self: self._parse_table_parts(), 795 exp.TableAlias: lambda self: self._parse_table_alias(), 796 exp.Tuple: lambda self: self._parse_value(), 797 exp.Whens: lambda self: self._parse_when_matched(), 798 exp.Where: lambda self: self._parse_where(), 799 exp.Window: lambda self: self._parse_named_window(), 800 exp.With: lambda self: self._parse_with(), 801 "JOIN_TYPE": lambda self: self._parse_join_parts(), 802 } 803 804 STATEMENT_PARSERS = { 805 TokenType.ALTER: lambda self: self._parse_alter(), 806 TokenType.ANALYZE: lambda self: self._parse_analyze(), 807 TokenType.BEGIN: lambda self: self._parse_transaction(), 808 TokenType.CACHE: lambda self: self._parse_cache(), 809 TokenType.COMMENT: lambda self: self._parse_comment(), 810 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 811 TokenType.COPY: lambda self: self._parse_copy(), 812 TokenType.CREATE: lambda self: self._parse_create(), 813 TokenType.DELETE: lambda self: self._parse_delete(), 814 TokenType.DESC: lambda self: self._parse_describe(), 815 TokenType.DESCRIBE: lambda self: self._parse_describe(), 816 TokenType.DROP: lambda self: self._parse_drop(), 817 TokenType.GRANT: lambda self: self._parse_grant(), 818 TokenType.INSERT: lambda self: self._parse_insert(), 819 TokenType.KILL: lambda self: self._parse_kill(), 820 TokenType.LOAD: lambda self: self._parse_load(), 821 TokenType.MERGE: lambda self: self._parse_merge(), 822 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 823 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 824 TokenType.REFRESH: lambda self: self._parse_refresh(), 825 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 826 TokenType.SET: lambda self: self._parse_set(), 827 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 828 TokenType.UNCACHE: lambda self: self._parse_uncache(), 829 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 830 TokenType.UPDATE: lambda self: self._parse_update(), 831 TokenType.USE: lambda self: self.expression( 832 exp.Use, 833 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 834 this=self._parse_table(schema=False), 835 ), 836 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 837 } 838 839 UNARY_PARSERS = { 840 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 841 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 842 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 843 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 844 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 845 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 846 } 847 848 STRING_PARSERS = { 849 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 850 exp.RawString, this=token.text 851 ), 852 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 853 exp.National, this=token.text 854 ), 855 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 856 TokenType.STRING: lambda self, token: self.expression( 857 exp.Literal, this=token.text, is_string=True 858 ), 859 TokenType.UNICODE_STRING: lambda self, token: self.expression( 860 exp.UnicodeString, 861 this=token.text, 862 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 863 ), 864 } 865 866 NUMERIC_PARSERS = { 867 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 868 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 869 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 870 TokenType.NUMBER: lambda self, token: self.expression( 871 exp.Literal, this=token.text, is_string=False 872 ), 873 } 874 875 PRIMARY_PARSERS = { 876 **STRING_PARSERS, 877 **NUMERIC_PARSERS, 878 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 879 TokenType.NULL: lambda self, _: self.expression(exp.Null), 880 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 881 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 882 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 883 TokenType.STAR: lambda self, _: self._parse_star_ops(), 884 } 885 886 PLACEHOLDER_PARSERS = { 887 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 888 TokenType.PARAMETER: lambda self: self._parse_parameter(), 889 TokenType.COLON: lambda self: ( 890 self.expression(exp.Placeholder, this=self._prev.text) 891 if self._match_set(self.ID_VAR_TOKENS) 892 else None 893 ), 894 } 895 896 RANGE_PARSERS = { 897 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 898 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 899 TokenType.GLOB: binary_range_parser(exp.Glob), 900 TokenType.ILIKE: binary_range_parser(exp.ILike), 901 TokenType.IN: lambda self, this: self._parse_in(this), 902 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 903 TokenType.IS: lambda self, this: self._parse_is(this), 904 TokenType.LIKE: binary_range_parser(exp.Like), 905 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 906 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 907 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 908 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 909 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 910 } 911 912 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 913 "ALLOWED_VALUES": lambda self: self.expression( 914 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 915 ), 916 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 917 "AUTO": lambda self: self._parse_auto_property(), 918 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 919 "BACKUP": lambda self: self.expression( 920 exp.BackupProperty, this=self._parse_var(any_token=True) 921 ), 922 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 923 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 924 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 925 "CHECKSUM": lambda self: self._parse_checksum(), 926 "CLUSTER BY": lambda self: self._parse_cluster(), 927 "CLUSTERED": lambda self: self._parse_clustered_by(), 928 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 929 exp.CollateProperty, **kwargs 930 ), 931 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 932 "CONTAINS": lambda self: self._parse_contains_property(), 933 "COPY": lambda self: self._parse_copy_property(), 934 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 935 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 936 "DEFINER": lambda self: self._parse_definer(), 937 "DETERMINISTIC": lambda self: self.expression( 938 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 939 ), 940 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 941 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 942 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 943 "DISTKEY": lambda self: self._parse_distkey(), 944 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 945 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 946 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 947 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 948 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 949 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 950 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 951 "FREESPACE": lambda self: self._parse_freespace(), 952 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 953 "HEAP": lambda self: self.expression(exp.HeapProperty), 954 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 955 "IMMUTABLE": lambda self: self.expression( 956 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 957 ), 958 "INHERITS": lambda self: self.expression( 959 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 960 ), 961 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 962 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 963 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 964 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 965 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 966 "LIKE": lambda self: self._parse_create_like(), 967 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 968 "LOCK": lambda self: self._parse_locking(), 969 "LOCKING": lambda self: self._parse_locking(), 970 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 971 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 972 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 973 "MODIFIES": lambda self: self._parse_modifies_property(), 974 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 975 "NO": lambda self: self._parse_no_property(), 976 "ON": lambda self: self._parse_on_property(), 977 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 978 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 979 "PARTITION": lambda self: self._parse_partitioned_of(), 980 "PARTITION BY": lambda self: self._parse_partitioned_by(), 981 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 982 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 983 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 984 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 985 "READS": lambda self: self._parse_reads_property(), 986 "REMOTE": lambda self: self._parse_remote_with_connection(), 987 "RETURNS": lambda self: self._parse_returns(), 988 "STRICT": lambda self: self.expression(exp.StrictProperty), 989 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 990 "ROW": lambda self: self._parse_row(), 991 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 992 "SAMPLE": lambda self: self.expression( 993 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 994 ), 995 "SECURE": lambda self: self.expression(exp.SecureProperty), 996 "SECURITY": lambda self: self._parse_security(), 997 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 998 "SETTINGS": lambda self: self._parse_settings_property(), 999 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1000 "SORTKEY": lambda self: self._parse_sortkey(), 1001 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1002 "STABLE": lambda self: self.expression( 1003 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1004 ), 1005 "STORED": lambda self: self._parse_stored(), 1006 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1007 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1008 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1009 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1010 "TO": lambda self: self._parse_to_table(), 1011 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1012 "TRANSFORM": lambda self: self.expression( 1013 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1014 ), 1015 "TTL": lambda self: self._parse_ttl(), 1016 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1017 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1018 "VOLATILE": lambda self: self._parse_volatile_property(), 1019 "WITH": lambda self: self._parse_with_property(), 1020 } 1021 1022 CONSTRAINT_PARSERS = { 1023 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1024 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1025 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1026 "CHARACTER SET": lambda self: self.expression( 1027 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1028 ), 1029 "CHECK": lambda self: self.expression( 1030 exp.CheckColumnConstraint, 1031 this=self._parse_wrapped(self._parse_assignment), 1032 enforced=self._match_text_seq("ENFORCED"), 1033 ), 1034 "COLLATE": lambda self: self.expression( 1035 exp.CollateColumnConstraint, 1036 this=self._parse_identifier() or self._parse_column(), 1037 ), 1038 "COMMENT": lambda self: self.expression( 1039 exp.CommentColumnConstraint, this=self._parse_string() 1040 ), 1041 "COMPRESS": lambda self: self._parse_compress(), 1042 "CLUSTERED": lambda self: self.expression( 1043 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1044 ), 1045 "NONCLUSTERED": lambda self: self.expression( 1046 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1047 ), 1048 "DEFAULT": lambda self: self.expression( 1049 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1050 ), 1051 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1052 "EPHEMERAL": lambda self: self.expression( 1053 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1054 ), 1055 "EXCLUDE": lambda self: self.expression( 1056 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1057 ), 1058 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1059 "FORMAT": lambda self: self.expression( 1060 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1061 ), 1062 "GENERATED": lambda self: self._parse_generated_as_identity(), 1063 "IDENTITY": lambda self: self._parse_auto_increment(), 1064 "INLINE": lambda self: self._parse_inline(), 1065 "LIKE": lambda self: self._parse_create_like(), 1066 "NOT": lambda self: self._parse_not_constraint(), 1067 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1068 "ON": lambda self: ( 1069 self._match(TokenType.UPDATE) 1070 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1071 ) 1072 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1073 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1074 "PERIOD": lambda self: self._parse_period_for_system_time(), 1075 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1076 "REFERENCES": lambda self: self._parse_references(match=False), 1077 "TITLE": lambda self: self.expression( 1078 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1079 ), 1080 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1081 "UNIQUE": lambda self: self._parse_unique(), 1082 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1083 "WATERMARK": lambda self: self.expression( 1084 exp.WatermarkColumnConstraint, 1085 this=self._match(TokenType.FOR) and self._parse_column(), 1086 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1087 ), 1088 "WITH": lambda self: self.expression( 1089 exp.Properties, expressions=self._parse_wrapped_properties() 1090 ), 1091 } 1092 1093 ALTER_PARSERS = { 1094 "ADD": lambda self: self._parse_alter_table_add(), 1095 "AS": lambda self: self._parse_select(), 1096 "ALTER": lambda self: self._parse_alter_table_alter(), 1097 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1098 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1099 "DROP": lambda self: self._parse_alter_table_drop(), 1100 "RENAME": lambda self: self._parse_alter_table_rename(), 1101 "SET": lambda self: self._parse_alter_table_set(), 1102 "SWAP": lambda self: self.expression( 1103 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1104 ), 1105 } 1106 1107 ALTER_ALTER_PARSERS = { 1108 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1109 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1110 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1111 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1112 } 1113 1114 SCHEMA_UNNAMED_CONSTRAINTS = { 1115 "CHECK", 1116 "EXCLUDE", 1117 "FOREIGN KEY", 1118 "LIKE", 1119 "PERIOD", 1120 "PRIMARY KEY", 1121 "UNIQUE", 1122 "WATERMARK", 1123 } 1124 1125 NO_PAREN_FUNCTION_PARSERS = { 1126 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1127 "CASE": lambda self: self._parse_case(), 1128 "CONNECT_BY_ROOT": lambda self: self.expression( 1129 exp.ConnectByRoot, this=self._parse_column() 1130 ), 1131 "IF": lambda self: self._parse_if(), 1132 } 1133 1134 INVALID_FUNC_NAME_TOKENS = { 1135 TokenType.IDENTIFIER, 1136 TokenType.STRING, 1137 } 1138 1139 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1140 1141 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1142 1143 FUNCTION_PARSERS = { 1144 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1145 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1146 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1147 "DECODE": lambda self: self._parse_decode(), 1148 "EXTRACT": lambda self: self._parse_extract(), 1149 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1150 "GAP_FILL": lambda self: self._parse_gap_fill(), 1151 "JSON_OBJECT": lambda self: self._parse_json_object(), 1152 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1153 "JSON_TABLE": lambda self: self._parse_json_table(), 1154 "MATCH": lambda self: self._parse_match_against(), 1155 "NORMALIZE": lambda self: self._parse_normalize(), 1156 "OPENJSON": lambda self: self._parse_open_json(), 1157 "OVERLAY": lambda self: self._parse_overlay(), 1158 "POSITION": lambda self: self._parse_position(), 1159 "PREDICT": lambda self: self._parse_predict(), 1160 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1161 "STRING_AGG": lambda self: self._parse_string_agg(), 1162 "SUBSTRING": lambda self: self._parse_substring(), 1163 "TRIM": lambda self: self._parse_trim(), 1164 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1165 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1166 "XMLELEMENT": lambda self: self.expression( 1167 exp.XMLElement, 1168 this=self._match_text_seq("NAME") and self._parse_id_var(), 1169 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1170 ), 1171 "XMLTABLE": lambda self: self._parse_xml_table(), 1172 } 1173 1174 QUERY_MODIFIER_PARSERS = { 1175 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1176 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1177 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1178 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1179 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1180 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1181 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1182 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1183 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1184 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1185 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1186 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1187 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1188 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1189 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1190 TokenType.CLUSTER_BY: lambda self: ( 1191 "cluster", 1192 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1193 ), 1194 TokenType.DISTRIBUTE_BY: lambda self: ( 1195 "distribute", 1196 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1197 ), 1198 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1199 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1200 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1201 } 1202 1203 SET_PARSERS = { 1204 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1205 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1206 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1207 "TRANSACTION": lambda self: self._parse_set_transaction(), 1208 } 1209 1210 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1211 1212 TYPE_LITERAL_PARSERS = { 1213 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1214 } 1215 1216 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1217 1218 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1219 1220 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1221 1222 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1223 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1224 "ISOLATION": ( 1225 ("LEVEL", "REPEATABLE", "READ"), 1226 ("LEVEL", "READ", "COMMITTED"), 1227 ("LEVEL", "READ", "UNCOMITTED"), 1228 ("LEVEL", "SERIALIZABLE"), 1229 ), 1230 "READ": ("WRITE", "ONLY"), 1231 } 1232 1233 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1234 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1235 ) 1236 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1237 1238 CREATE_SEQUENCE: OPTIONS_TYPE = { 1239 "SCALE": ("EXTEND", "NOEXTEND"), 1240 "SHARD": ("EXTEND", "NOEXTEND"), 1241 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1242 **dict.fromkeys( 1243 ( 1244 "SESSION", 1245 "GLOBAL", 1246 "KEEP", 1247 "NOKEEP", 1248 "ORDER", 1249 "NOORDER", 1250 "NOCACHE", 1251 "CYCLE", 1252 "NOCYCLE", 1253 "NOMINVALUE", 1254 "NOMAXVALUE", 1255 "NOSCALE", 1256 "NOSHARD", 1257 ), 1258 tuple(), 1259 ), 1260 } 1261 1262 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1263 1264 USABLES: OPTIONS_TYPE = dict.fromkeys( 1265 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1266 ) 1267 1268 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1269 1270 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1271 "TYPE": ("EVOLUTION",), 1272 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1273 } 1274 1275 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1276 1277 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1278 1279 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1280 "NOT": ("ENFORCED",), 1281 "MATCH": ( 1282 "FULL", 1283 "PARTIAL", 1284 "SIMPLE", 1285 ), 1286 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1287 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1288 } 1289 1290 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1291 1292 CLONE_KEYWORDS = {"CLONE", "COPY"} 1293 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1294 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1295 1296 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1297 1298 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1299 1300 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1301 1302 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1303 1304 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1305 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1306 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1307 1308 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1309 1310 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1311 1312 ADD_CONSTRAINT_TOKENS = { 1313 TokenType.CONSTRAINT, 1314 TokenType.FOREIGN_KEY, 1315 TokenType.INDEX, 1316 TokenType.KEY, 1317 TokenType.PRIMARY_KEY, 1318 TokenType.UNIQUE, 1319 } 1320 1321 DISTINCT_TOKENS = {TokenType.DISTINCT} 1322 1323 NULL_TOKENS = {TokenType.NULL} 1324 1325 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1326 1327 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1328 1329 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1330 1331 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1332 1333 ODBC_DATETIME_LITERALS = { 1334 "d": exp.Date, 1335 "t": exp.Time, 1336 "ts": exp.Timestamp, 1337 } 1338 1339 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1340 1341 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1342 1343 # The style options for the DESCRIBE statement 1344 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1345 1346 # The style options for the ANALYZE statement 1347 ANALYZE_STYLES = { 1348 "BUFFER_USAGE_LIMIT", 1349 "FULL", 1350 "LOCAL", 1351 "NO_WRITE_TO_BINLOG", 1352 "SAMPLE", 1353 "SKIP_LOCKED", 1354 "VERBOSE", 1355 } 1356 1357 ANALYZE_EXPRESSION_PARSERS = { 1358 "ALL": lambda self: self._parse_analyze_columns(), 1359 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1360 "DELETE": lambda self: self._parse_analyze_delete(), 1361 "DROP": lambda self: self._parse_analyze_histogram(), 1362 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1363 "LIST": lambda self: self._parse_analyze_list(), 1364 "PREDICATE": lambda self: self._parse_analyze_columns(), 1365 "UPDATE": lambda self: self._parse_analyze_histogram(), 1366 "VALIDATE": lambda self: self._parse_analyze_validate(), 1367 } 1368 1369 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1370 1371 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1372 1373 OPERATION_MODIFIERS: t.Set[str] = set() 1374 1375 STRICT_CAST = True 1376 1377 PREFIXED_PIVOT_COLUMNS = False 1378 IDENTIFY_PIVOT_STRINGS = False 1379 1380 LOG_DEFAULTS_TO_LN = False 1381 1382 # Whether ADD is present for each column added by ALTER TABLE 1383 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1384 1385 # Whether the table sample clause expects CSV syntax 1386 TABLESAMPLE_CSV = False 1387 1388 # The default method used for table sampling 1389 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1390 1391 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1392 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1393 1394 # Whether the TRIM function expects the characters to trim as its first argument 1395 TRIM_PATTERN_FIRST = False 1396 1397 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1398 STRING_ALIASES = False 1399 1400 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1401 MODIFIERS_ATTACHED_TO_SET_OP = True 1402 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1403 1404 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1405 NO_PAREN_IF_COMMANDS = True 1406 1407 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1408 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1409 1410 # Whether the `:` operator is used to extract a value from a VARIANT column 1411 COLON_IS_VARIANT_EXTRACT = False 1412 1413 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1414 # If this is True and '(' is not found, the keyword will be treated as an identifier 1415 VALUES_FOLLOWED_BY_PAREN = True 1416 1417 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1418 SUPPORTS_IMPLICIT_UNNEST = False 1419 1420 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1421 INTERVAL_SPANS = True 1422 1423 # Whether a PARTITION clause can follow a table reference 1424 SUPPORTS_PARTITION_SELECTION = False 1425 1426 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1427 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1428 1429 # Whether the 'AS' keyword is optional in the CTE definition syntax 1430 OPTIONAL_ALIAS_TOKEN_CTE = True 1431 1432 __slots__ = ( 1433 "error_level", 1434 "error_message_context", 1435 "max_errors", 1436 "dialect", 1437 "sql", 1438 "errors", 1439 "_tokens", 1440 "_index", 1441 "_curr", 1442 "_next", 1443 "_prev", 1444 "_prev_comments", 1445 ) 1446 1447 # Autofilled 1448 SHOW_TRIE: t.Dict = {} 1449 SET_TRIE: t.Dict = {} 1450 1451 def __init__( 1452 self, 1453 error_level: t.Optional[ErrorLevel] = None, 1454 error_message_context: int = 100, 1455 max_errors: int = 3, 1456 dialect: DialectType = None, 1457 ): 1458 from sqlglot.dialects import Dialect 1459 1460 self.error_level = error_level or ErrorLevel.IMMEDIATE 1461 self.error_message_context = error_message_context 1462 self.max_errors = max_errors 1463 self.dialect = Dialect.get_or_raise(dialect) 1464 self.reset() 1465 1466 def reset(self): 1467 self.sql = "" 1468 self.errors = [] 1469 self._tokens = [] 1470 self._index = 0 1471 self._curr = None 1472 self._next = None 1473 self._prev = None 1474 self._prev_comments = None 1475 1476 def parse( 1477 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1478 ) -> t.List[t.Optional[exp.Expression]]: 1479 """ 1480 Parses a list of tokens and returns a list of syntax trees, one tree 1481 per parsed SQL statement. 1482 1483 Args: 1484 raw_tokens: The list of tokens. 1485 sql: The original SQL string, used to produce helpful debug messages. 1486 1487 Returns: 1488 The list of the produced syntax trees. 1489 """ 1490 return self._parse( 1491 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1492 ) 1493 1494 def parse_into( 1495 self, 1496 expression_types: exp.IntoType, 1497 raw_tokens: t.List[Token], 1498 sql: t.Optional[str] = None, 1499 ) -> t.List[t.Optional[exp.Expression]]: 1500 """ 1501 Parses a list of tokens into a given Expression type. If a collection of Expression 1502 types is given instead, this method will try to parse the token list into each one 1503 of them, stopping at the first for which the parsing succeeds. 1504 1505 Args: 1506 expression_types: The expression type(s) to try and parse the token list into. 1507 raw_tokens: The list of tokens. 1508 sql: The original SQL string, used to produce helpful debug messages. 1509 1510 Returns: 1511 The target Expression. 1512 """ 1513 errors = [] 1514 for expression_type in ensure_list(expression_types): 1515 parser = self.EXPRESSION_PARSERS.get(expression_type) 1516 if not parser: 1517 raise TypeError(f"No parser registered for {expression_type}") 1518 1519 try: 1520 return self._parse(parser, raw_tokens, sql) 1521 except ParseError as e: 1522 e.errors[0]["into_expression"] = expression_type 1523 errors.append(e) 1524 1525 raise ParseError( 1526 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1527 errors=merge_errors(errors), 1528 ) from errors[-1] 1529 1530 def _parse( 1531 self, 1532 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1533 raw_tokens: t.List[Token], 1534 sql: t.Optional[str] = None, 1535 ) -> t.List[t.Optional[exp.Expression]]: 1536 self.reset() 1537 self.sql = sql or "" 1538 1539 total = len(raw_tokens) 1540 chunks: t.List[t.List[Token]] = [[]] 1541 1542 for i, token in enumerate(raw_tokens): 1543 if token.token_type == TokenType.SEMICOLON: 1544 if token.comments: 1545 chunks.append([token]) 1546 1547 if i < total - 1: 1548 chunks.append([]) 1549 else: 1550 chunks[-1].append(token) 1551 1552 expressions = [] 1553 1554 for tokens in chunks: 1555 self._index = -1 1556 self._tokens = tokens 1557 self._advance() 1558 1559 expressions.append(parse_method(self)) 1560 1561 if self._index < len(self._tokens): 1562 self.raise_error("Invalid expression / Unexpected token") 1563 1564 self.check_errors() 1565 1566 return expressions 1567 1568 def check_errors(self) -> None: 1569 """Logs or raises any found errors, depending on the chosen error level setting.""" 1570 if self.error_level == ErrorLevel.WARN: 1571 for error in self.errors: 1572 logger.error(str(error)) 1573 elif self.error_level == ErrorLevel.RAISE and self.errors: 1574 raise ParseError( 1575 concat_messages(self.errors, self.max_errors), 1576 errors=merge_errors(self.errors), 1577 ) 1578 1579 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1580 """ 1581 Appends an error in the list of recorded errors or raises it, depending on the chosen 1582 error level setting. 1583 """ 1584 token = token or self._curr or self._prev or Token.string("") 1585 start = token.start 1586 end = token.end + 1 1587 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1588 highlight = self.sql[start:end] 1589 end_context = self.sql[end : end + self.error_message_context] 1590 1591 error = ParseError.new( 1592 f"{message}. Line {token.line}, Col: {token.col}.\n" 1593 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1594 description=message, 1595 line=token.line, 1596 col=token.col, 1597 start_context=start_context, 1598 highlight=highlight, 1599 end_context=end_context, 1600 ) 1601 1602 if self.error_level == ErrorLevel.IMMEDIATE: 1603 raise error 1604 1605 self.errors.append(error) 1606 1607 def expression( 1608 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1609 ) -> E: 1610 """ 1611 Creates a new, validated Expression. 1612 1613 Args: 1614 exp_class: The expression class to instantiate. 1615 comments: An optional list of comments to attach to the expression. 1616 kwargs: The arguments to set for the expression along with their respective values. 1617 1618 Returns: 1619 The target expression. 1620 """ 1621 instance = exp_class(**kwargs) 1622 instance.add_comments(comments) if comments else self._add_comments(instance) 1623 return self.validate_expression(instance) 1624 1625 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1626 if expression and self._prev_comments: 1627 expression.add_comments(self._prev_comments) 1628 self._prev_comments = None 1629 1630 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1631 """ 1632 Validates an Expression, making sure that all its mandatory arguments are set. 1633 1634 Args: 1635 expression: The expression to validate. 1636 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1637 1638 Returns: 1639 The validated expression. 1640 """ 1641 if self.error_level != ErrorLevel.IGNORE: 1642 for error_message in expression.error_messages(args): 1643 self.raise_error(error_message) 1644 1645 return expression 1646 1647 def _find_sql(self, start: Token, end: Token) -> str: 1648 return self.sql[start.start : end.end + 1] 1649 1650 def _is_connected(self) -> bool: 1651 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1652 1653 def _advance(self, times: int = 1) -> None: 1654 self._index += times 1655 self._curr = seq_get(self._tokens, self._index) 1656 self._next = seq_get(self._tokens, self._index + 1) 1657 1658 if self._index > 0: 1659 self._prev = self._tokens[self._index - 1] 1660 self._prev_comments = self._prev.comments 1661 else: 1662 self._prev = None 1663 self._prev_comments = None 1664 1665 def _retreat(self, index: int) -> None: 1666 if index != self._index: 1667 self._advance(index - self._index) 1668 1669 def _warn_unsupported(self) -> None: 1670 if len(self._tokens) <= 1: 1671 return 1672 1673 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1674 # interested in emitting a warning for the one being currently processed. 1675 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1676 1677 logger.warning( 1678 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1679 ) 1680 1681 def _parse_command(self) -> exp.Command: 1682 self._warn_unsupported() 1683 return self.expression( 1684 exp.Command, 1685 comments=self._prev_comments, 1686 this=self._prev.text.upper(), 1687 expression=self._parse_string(), 1688 ) 1689 1690 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1691 """ 1692 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1693 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1694 solve this by setting & resetting the parser state accordingly 1695 """ 1696 index = self._index 1697 error_level = self.error_level 1698 1699 self.error_level = ErrorLevel.IMMEDIATE 1700 try: 1701 this = parse_method() 1702 except ParseError: 1703 this = None 1704 finally: 1705 if not this or retreat: 1706 self._retreat(index) 1707 self.error_level = error_level 1708 1709 return this 1710 1711 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1712 start = self._prev 1713 exists = self._parse_exists() if allow_exists else None 1714 1715 self._match(TokenType.ON) 1716 1717 materialized = self._match_text_seq("MATERIALIZED") 1718 kind = self._match_set(self.CREATABLES) and self._prev 1719 if not kind: 1720 return self._parse_as_command(start) 1721 1722 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1723 this = self._parse_user_defined_function(kind=kind.token_type) 1724 elif kind.token_type == TokenType.TABLE: 1725 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1726 elif kind.token_type == TokenType.COLUMN: 1727 this = self._parse_column() 1728 else: 1729 this = self._parse_id_var() 1730 1731 self._match(TokenType.IS) 1732 1733 return self.expression( 1734 exp.Comment, 1735 this=this, 1736 kind=kind.text, 1737 expression=self._parse_string(), 1738 exists=exists, 1739 materialized=materialized, 1740 ) 1741 1742 def _parse_to_table( 1743 self, 1744 ) -> exp.ToTableProperty: 1745 table = self._parse_table_parts(schema=True) 1746 return self.expression(exp.ToTableProperty, this=table) 1747 1748 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1749 def _parse_ttl(self) -> exp.Expression: 1750 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1751 this = self._parse_bitwise() 1752 1753 if self._match_text_seq("DELETE"): 1754 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1755 if self._match_text_seq("RECOMPRESS"): 1756 return self.expression( 1757 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1758 ) 1759 if self._match_text_seq("TO", "DISK"): 1760 return self.expression( 1761 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1762 ) 1763 if self._match_text_seq("TO", "VOLUME"): 1764 return self.expression( 1765 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1766 ) 1767 1768 return this 1769 1770 expressions = self._parse_csv(_parse_ttl_action) 1771 where = self._parse_where() 1772 group = self._parse_group() 1773 1774 aggregates = None 1775 if group and self._match(TokenType.SET): 1776 aggregates = self._parse_csv(self._parse_set_item) 1777 1778 return self.expression( 1779 exp.MergeTreeTTL, 1780 expressions=expressions, 1781 where=where, 1782 group=group, 1783 aggregates=aggregates, 1784 ) 1785 1786 def _parse_statement(self) -> t.Optional[exp.Expression]: 1787 if self._curr is None: 1788 return None 1789 1790 if self._match_set(self.STATEMENT_PARSERS): 1791 comments = self._prev_comments 1792 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1793 stmt.add_comments(comments, prepend=True) 1794 return stmt 1795 1796 if self._match_set(self.dialect.tokenizer.COMMANDS): 1797 return self._parse_command() 1798 1799 expression = self._parse_expression() 1800 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1801 return self._parse_query_modifiers(expression) 1802 1803 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1804 start = self._prev 1805 temporary = self._match(TokenType.TEMPORARY) 1806 materialized = self._match_text_seq("MATERIALIZED") 1807 1808 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1809 if not kind: 1810 return self._parse_as_command(start) 1811 1812 concurrently = self._match_text_seq("CONCURRENTLY") 1813 if_exists = exists or self._parse_exists() 1814 1815 if kind == "COLUMN": 1816 this = self._parse_column() 1817 else: 1818 this = self._parse_table_parts( 1819 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1820 ) 1821 1822 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1823 1824 if self._match(TokenType.L_PAREN, advance=False): 1825 expressions = self._parse_wrapped_csv(self._parse_types) 1826 else: 1827 expressions = None 1828 1829 return self.expression( 1830 exp.Drop, 1831 exists=if_exists, 1832 this=this, 1833 expressions=expressions, 1834 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1835 temporary=temporary, 1836 materialized=materialized, 1837 cascade=self._match_text_seq("CASCADE"), 1838 constraints=self._match_text_seq("CONSTRAINTS"), 1839 purge=self._match_text_seq("PURGE"), 1840 cluster=cluster, 1841 concurrently=concurrently, 1842 ) 1843 1844 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1845 return ( 1846 self._match_text_seq("IF") 1847 and (not not_ or self._match(TokenType.NOT)) 1848 and self._match(TokenType.EXISTS) 1849 ) 1850 1851 def _parse_create(self) -> exp.Create | exp.Command: 1852 # Note: this can't be None because we've matched a statement parser 1853 start = self._prev 1854 1855 replace = ( 1856 start.token_type == TokenType.REPLACE 1857 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1858 or self._match_pair(TokenType.OR, TokenType.ALTER) 1859 ) 1860 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1861 1862 unique = self._match(TokenType.UNIQUE) 1863 1864 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1865 clustered = True 1866 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1867 "COLUMNSTORE" 1868 ): 1869 clustered = False 1870 else: 1871 clustered = None 1872 1873 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1874 self._advance() 1875 1876 properties = None 1877 create_token = self._match_set(self.CREATABLES) and self._prev 1878 1879 if not create_token: 1880 # exp.Properties.Location.POST_CREATE 1881 properties = self._parse_properties() 1882 create_token = self._match_set(self.CREATABLES) and self._prev 1883 1884 if not properties or not create_token: 1885 return self._parse_as_command(start) 1886 1887 concurrently = self._match_text_seq("CONCURRENTLY") 1888 exists = self._parse_exists(not_=True) 1889 this = None 1890 expression: t.Optional[exp.Expression] = None 1891 indexes = None 1892 no_schema_binding = None 1893 begin = None 1894 end = None 1895 clone = None 1896 1897 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1898 nonlocal properties 1899 if properties and temp_props: 1900 properties.expressions.extend(temp_props.expressions) 1901 elif temp_props: 1902 properties = temp_props 1903 1904 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1905 this = self._parse_user_defined_function(kind=create_token.token_type) 1906 1907 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1908 extend_props(self._parse_properties()) 1909 1910 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1911 extend_props(self._parse_properties()) 1912 1913 if not expression: 1914 if self._match(TokenType.COMMAND): 1915 expression = self._parse_as_command(self._prev) 1916 else: 1917 begin = self._match(TokenType.BEGIN) 1918 return_ = self._match_text_seq("RETURN") 1919 1920 if self._match(TokenType.STRING, advance=False): 1921 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1922 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1923 expression = self._parse_string() 1924 extend_props(self._parse_properties()) 1925 else: 1926 expression = self._parse_user_defined_function_expression() 1927 1928 end = self._match_text_seq("END") 1929 1930 if return_: 1931 expression = self.expression(exp.Return, this=expression) 1932 elif create_token.token_type == TokenType.INDEX: 1933 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1934 if not self._match(TokenType.ON): 1935 index = self._parse_id_var() 1936 anonymous = False 1937 else: 1938 index = None 1939 anonymous = True 1940 1941 this = self._parse_index(index=index, anonymous=anonymous) 1942 elif create_token.token_type in self.DB_CREATABLES: 1943 table_parts = self._parse_table_parts( 1944 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1945 ) 1946 1947 # exp.Properties.Location.POST_NAME 1948 self._match(TokenType.COMMA) 1949 extend_props(self._parse_properties(before=True)) 1950 1951 this = self._parse_schema(this=table_parts) 1952 1953 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1954 extend_props(self._parse_properties()) 1955 1956 self._match(TokenType.ALIAS) 1957 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1958 # exp.Properties.Location.POST_ALIAS 1959 extend_props(self._parse_properties()) 1960 1961 if create_token.token_type == TokenType.SEQUENCE: 1962 expression = self._parse_types() 1963 extend_props(self._parse_properties()) 1964 else: 1965 expression = self._parse_ddl_select() 1966 1967 if create_token.token_type == TokenType.TABLE: 1968 # exp.Properties.Location.POST_EXPRESSION 1969 extend_props(self._parse_properties()) 1970 1971 indexes = [] 1972 while True: 1973 index = self._parse_index() 1974 1975 # exp.Properties.Location.POST_INDEX 1976 extend_props(self._parse_properties()) 1977 if not index: 1978 break 1979 else: 1980 self._match(TokenType.COMMA) 1981 indexes.append(index) 1982 elif create_token.token_type == TokenType.VIEW: 1983 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1984 no_schema_binding = True 1985 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1986 extend_props(self._parse_properties()) 1987 1988 shallow = self._match_text_seq("SHALLOW") 1989 1990 if self._match_texts(self.CLONE_KEYWORDS): 1991 copy = self._prev.text.lower() == "copy" 1992 clone = self.expression( 1993 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1994 ) 1995 1996 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1997 return self._parse_as_command(start) 1998 1999 create_kind_text = create_token.text.upper() 2000 return self.expression( 2001 exp.Create, 2002 this=this, 2003 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2004 replace=replace, 2005 refresh=refresh, 2006 unique=unique, 2007 expression=expression, 2008 exists=exists, 2009 properties=properties, 2010 indexes=indexes, 2011 no_schema_binding=no_schema_binding, 2012 begin=begin, 2013 end=end, 2014 clone=clone, 2015 concurrently=concurrently, 2016 clustered=clustered, 2017 ) 2018 2019 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2020 seq = exp.SequenceProperties() 2021 2022 options = [] 2023 index = self._index 2024 2025 while self._curr: 2026 self._match(TokenType.COMMA) 2027 if self._match_text_seq("INCREMENT"): 2028 self._match_text_seq("BY") 2029 self._match_text_seq("=") 2030 seq.set("increment", self._parse_term()) 2031 elif self._match_text_seq("MINVALUE"): 2032 seq.set("minvalue", self._parse_term()) 2033 elif self._match_text_seq("MAXVALUE"): 2034 seq.set("maxvalue", self._parse_term()) 2035 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2036 self._match_text_seq("=") 2037 seq.set("start", self._parse_term()) 2038 elif self._match_text_seq("CACHE"): 2039 # T-SQL allows empty CACHE which is initialized dynamically 2040 seq.set("cache", self._parse_number() or True) 2041 elif self._match_text_seq("OWNED", "BY"): 2042 # "OWNED BY NONE" is the default 2043 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2044 else: 2045 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2046 if opt: 2047 options.append(opt) 2048 else: 2049 break 2050 2051 seq.set("options", options if options else None) 2052 return None if self._index == index else seq 2053 2054 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2055 # only used for teradata currently 2056 self._match(TokenType.COMMA) 2057 2058 kwargs = { 2059 "no": self._match_text_seq("NO"), 2060 "dual": self._match_text_seq("DUAL"), 2061 "before": self._match_text_seq("BEFORE"), 2062 "default": self._match_text_seq("DEFAULT"), 2063 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2064 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2065 "after": self._match_text_seq("AFTER"), 2066 "minimum": self._match_texts(("MIN", "MINIMUM")), 2067 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2068 } 2069 2070 if self._match_texts(self.PROPERTY_PARSERS): 2071 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2072 try: 2073 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2074 except TypeError: 2075 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2076 2077 return None 2078 2079 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2080 return self._parse_wrapped_csv(self._parse_property) 2081 2082 def _parse_property(self) -> t.Optional[exp.Expression]: 2083 if self._match_texts(self.PROPERTY_PARSERS): 2084 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2085 2086 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2087 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2088 2089 if self._match_text_seq("COMPOUND", "SORTKEY"): 2090 return self._parse_sortkey(compound=True) 2091 2092 if self._match_text_seq("SQL", "SECURITY"): 2093 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2094 2095 index = self._index 2096 key = self._parse_column() 2097 2098 if not self._match(TokenType.EQ): 2099 self._retreat(index) 2100 return self._parse_sequence_properties() 2101 2102 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2103 if isinstance(key, exp.Column): 2104 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2105 2106 value = self._parse_bitwise() or self._parse_var(any_token=True) 2107 2108 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2109 if isinstance(value, exp.Column): 2110 value = exp.var(value.name) 2111 2112 return self.expression(exp.Property, this=key, value=value) 2113 2114 def _parse_stored(self) -> exp.FileFormatProperty: 2115 self._match(TokenType.ALIAS) 2116 2117 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2118 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2119 2120 return self.expression( 2121 exp.FileFormatProperty, 2122 this=( 2123 self.expression( 2124 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2125 ) 2126 if input_format or output_format 2127 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2128 ), 2129 ) 2130 2131 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2132 field = self._parse_field() 2133 if isinstance(field, exp.Identifier) and not field.quoted: 2134 field = exp.var(field) 2135 2136 return field 2137 2138 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2139 self._match(TokenType.EQ) 2140 self._match(TokenType.ALIAS) 2141 2142 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2143 2144 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2145 properties = [] 2146 while True: 2147 if before: 2148 prop = self._parse_property_before() 2149 else: 2150 prop = self._parse_property() 2151 if not prop: 2152 break 2153 for p in ensure_list(prop): 2154 properties.append(p) 2155 2156 if properties: 2157 return self.expression(exp.Properties, expressions=properties) 2158 2159 return None 2160 2161 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2162 return self.expression( 2163 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2164 ) 2165 2166 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2167 if self._match_texts(("DEFINER", "INVOKER")): 2168 security_specifier = self._prev.text.upper() 2169 return self.expression(exp.SecurityProperty, this=security_specifier) 2170 return None 2171 2172 def _parse_settings_property(self) -> exp.SettingsProperty: 2173 return self.expression( 2174 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2175 ) 2176 2177 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2178 if self._index >= 2: 2179 pre_volatile_token = self._tokens[self._index - 2] 2180 else: 2181 pre_volatile_token = None 2182 2183 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2184 return exp.VolatileProperty() 2185 2186 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2187 2188 def _parse_retention_period(self) -> exp.Var: 2189 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2190 number = self._parse_number() 2191 number_str = f"{number} " if number else "" 2192 unit = self._parse_var(any_token=True) 2193 return exp.var(f"{number_str}{unit}") 2194 2195 def _parse_system_versioning_property( 2196 self, with_: bool = False 2197 ) -> exp.WithSystemVersioningProperty: 2198 self._match(TokenType.EQ) 2199 prop = self.expression( 2200 exp.WithSystemVersioningProperty, 2201 **{ # type: ignore 2202 "on": True, 2203 "with": with_, 2204 }, 2205 ) 2206 2207 if self._match_text_seq("OFF"): 2208 prop.set("on", False) 2209 return prop 2210 2211 self._match(TokenType.ON) 2212 if self._match(TokenType.L_PAREN): 2213 while self._curr and not self._match(TokenType.R_PAREN): 2214 if self._match_text_seq("HISTORY_TABLE", "="): 2215 prop.set("this", self._parse_table_parts()) 2216 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2217 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2218 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2219 prop.set("retention_period", self._parse_retention_period()) 2220 2221 self._match(TokenType.COMMA) 2222 2223 return prop 2224 2225 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2226 self._match(TokenType.EQ) 2227 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2228 prop = self.expression(exp.DataDeletionProperty, on=on) 2229 2230 if self._match(TokenType.L_PAREN): 2231 while self._curr and not self._match(TokenType.R_PAREN): 2232 if self._match_text_seq("FILTER_COLUMN", "="): 2233 prop.set("filter_column", self._parse_column()) 2234 elif self._match_text_seq("RETENTION_PERIOD", "="): 2235 prop.set("retention_period", self._parse_retention_period()) 2236 2237 self._match(TokenType.COMMA) 2238 2239 return prop 2240 2241 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2242 kind = "HASH" 2243 expressions: t.Optional[t.List[exp.Expression]] = None 2244 if self._match_text_seq("BY", "HASH"): 2245 expressions = self._parse_wrapped_csv(self._parse_id_var) 2246 elif self._match_text_seq("BY", "RANDOM"): 2247 kind = "RANDOM" 2248 2249 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2250 buckets: t.Optional[exp.Expression] = None 2251 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2252 buckets = self._parse_number() 2253 2254 return self.expression( 2255 exp.DistributedByProperty, 2256 expressions=expressions, 2257 kind=kind, 2258 buckets=buckets, 2259 order=self._parse_order(), 2260 ) 2261 2262 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2263 self._match_text_seq("KEY") 2264 expressions = self._parse_wrapped_id_vars() 2265 return self.expression(expr_type, expressions=expressions) 2266 2267 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2268 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2269 prop = self._parse_system_versioning_property(with_=True) 2270 self._match_r_paren() 2271 return prop 2272 2273 if self._match(TokenType.L_PAREN, advance=False): 2274 return self._parse_wrapped_properties() 2275 2276 if self._match_text_seq("JOURNAL"): 2277 return self._parse_withjournaltable() 2278 2279 if self._match_texts(self.VIEW_ATTRIBUTES): 2280 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2281 2282 if self._match_text_seq("DATA"): 2283 return self._parse_withdata(no=False) 2284 elif self._match_text_seq("NO", "DATA"): 2285 return self._parse_withdata(no=True) 2286 2287 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2288 return self._parse_serde_properties(with_=True) 2289 2290 if self._match(TokenType.SCHEMA): 2291 return self.expression( 2292 exp.WithSchemaBindingProperty, 2293 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2294 ) 2295 2296 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2297 return self.expression( 2298 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2299 ) 2300 2301 if not self._next: 2302 return None 2303 2304 return self._parse_withisolatedloading() 2305 2306 def _parse_procedure_option(self) -> exp.Expression | None: 2307 if self._match_text_seq("EXECUTE", "AS"): 2308 return self.expression( 2309 exp.ExecuteAsProperty, 2310 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2311 or self._parse_string(), 2312 ) 2313 2314 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2315 2316 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2317 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2318 self._match(TokenType.EQ) 2319 2320 user = self._parse_id_var() 2321 self._match(TokenType.PARAMETER) 2322 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2323 2324 if not user or not host: 2325 return None 2326 2327 return exp.DefinerProperty(this=f"{user}@{host}") 2328 2329 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2330 self._match(TokenType.TABLE) 2331 self._match(TokenType.EQ) 2332 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2333 2334 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2335 return self.expression(exp.LogProperty, no=no) 2336 2337 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2338 return self.expression(exp.JournalProperty, **kwargs) 2339 2340 def _parse_checksum(self) -> exp.ChecksumProperty: 2341 self._match(TokenType.EQ) 2342 2343 on = None 2344 if self._match(TokenType.ON): 2345 on = True 2346 elif self._match_text_seq("OFF"): 2347 on = False 2348 2349 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2350 2351 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2352 return self.expression( 2353 exp.Cluster, 2354 expressions=( 2355 self._parse_wrapped_csv(self._parse_ordered) 2356 if wrapped 2357 else self._parse_csv(self._parse_ordered) 2358 ), 2359 ) 2360 2361 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2362 self._match_text_seq("BY") 2363 2364 self._match_l_paren() 2365 expressions = self._parse_csv(self._parse_column) 2366 self._match_r_paren() 2367 2368 if self._match_text_seq("SORTED", "BY"): 2369 self._match_l_paren() 2370 sorted_by = self._parse_csv(self._parse_ordered) 2371 self._match_r_paren() 2372 else: 2373 sorted_by = None 2374 2375 self._match(TokenType.INTO) 2376 buckets = self._parse_number() 2377 self._match_text_seq("BUCKETS") 2378 2379 return self.expression( 2380 exp.ClusteredByProperty, 2381 expressions=expressions, 2382 sorted_by=sorted_by, 2383 buckets=buckets, 2384 ) 2385 2386 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2387 if not self._match_text_seq("GRANTS"): 2388 self._retreat(self._index - 1) 2389 return None 2390 2391 return self.expression(exp.CopyGrantsProperty) 2392 2393 def _parse_freespace(self) -> exp.FreespaceProperty: 2394 self._match(TokenType.EQ) 2395 return self.expression( 2396 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2397 ) 2398 2399 def _parse_mergeblockratio( 2400 self, no: bool = False, default: bool = False 2401 ) -> exp.MergeBlockRatioProperty: 2402 if self._match(TokenType.EQ): 2403 return self.expression( 2404 exp.MergeBlockRatioProperty, 2405 this=self._parse_number(), 2406 percent=self._match(TokenType.PERCENT), 2407 ) 2408 2409 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2410 2411 def _parse_datablocksize( 2412 self, 2413 default: t.Optional[bool] = None, 2414 minimum: t.Optional[bool] = None, 2415 maximum: t.Optional[bool] = None, 2416 ) -> exp.DataBlocksizeProperty: 2417 self._match(TokenType.EQ) 2418 size = self._parse_number() 2419 2420 units = None 2421 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2422 units = self._prev.text 2423 2424 return self.expression( 2425 exp.DataBlocksizeProperty, 2426 size=size, 2427 units=units, 2428 default=default, 2429 minimum=minimum, 2430 maximum=maximum, 2431 ) 2432 2433 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2434 self._match(TokenType.EQ) 2435 always = self._match_text_seq("ALWAYS") 2436 manual = self._match_text_seq("MANUAL") 2437 never = self._match_text_seq("NEVER") 2438 default = self._match_text_seq("DEFAULT") 2439 2440 autotemp = None 2441 if self._match_text_seq("AUTOTEMP"): 2442 autotemp = self._parse_schema() 2443 2444 return self.expression( 2445 exp.BlockCompressionProperty, 2446 always=always, 2447 manual=manual, 2448 never=never, 2449 default=default, 2450 autotemp=autotemp, 2451 ) 2452 2453 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2454 index = self._index 2455 no = self._match_text_seq("NO") 2456 concurrent = self._match_text_seq("CONCURRENT") 2457 2458 if not self._match_text_seq("ISOLATED", "LOADING"): 2459 self._retreat(index) 2460 return None 2461 2462 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2463 return self.expression( 2464 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2465 ) 2466 2467 def _parse_locking(self) -> exp.LockingProperty: 2468 if self._match(TokenType.TABLE): 2469 kind = "TABLE" 2470 elif self._match(TokenType.VIEW): 2471 kind = "VIEW" 2472 elif self._match(TokenType.ROW): 2473 kind = "ROW" 2474 elif self._match_text_seq("DATABASE"): 2475 kind = "DATABASE" 2476 else: 2477 kind = None 2478 2479 if kind in ("DATABASE", "TABLE", "VIEW"): 2480 this = self._parse_table_parts() 2481 else: 2482 this = None 2483 2484 if self._match(TokenType.FOR): 2485 for_or_in = "FOR" 2486 elif self._match(TokenType.IN): 2487 for_or_in = "IN" 2488 else: 2489 for_or_in = None 2490 2491 if self._match_text_seq("ACCESS"): 2492 lock_type = "ACCESS" 2493 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2494 lock_type = "EXCLUSIVE" 2495 elif self._match_text_seq("SHARE"): 2496 lock_type = "SHARE" 2497 elif self._match_text_seq("READ"): 2498 lock_type = "READ" 2499 elif self._match_text_seq("WRITE"): 2500 lock_type = "WRITE" 2501 elif self._match_text_seq("CHECKSUM"): 2502 lock_type = "CHECKSUM" 2503 else: 2504 lock_type = None 2505 2506 override = self._match_text_seq("OVERRIDE") 2507 2508 return self.expression( 2509 exp.LockingProperty, 2510 this=this, 2511 kind=kind, 2512 for_or_in=for_or_in, 2513 lock_type=lock_type, 2514 override=override, 2515 ) 2516 2517 def _parse_partition_by(self) -> t.List[exp.Expression]: 2518 if self._match(TokenType.PARTITION_BY): 2519 return self._parse_csv(self._parse_assignment) 2520 return [] 2521 2522 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2523 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2524 if self._match_text_seq("MINVALUE"): 2525 return exp.var("MINVALUE") 2526 if self._match_text_seq("MAXVALUE"): 2527 return exp.var("MAXVALUE") 2528 return self._parse_bitwise() 2529 2530 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2531 expression = None 2532 from_expressions = None 2533 to_expressions = None 2534 2535 if self._match(TokenType.IN): 2536 this = self._parse_wrapped_csv(self._parse_bitwise) 2537 elif self._match(TokenType.FROM): 2538 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2539 self._match_text_seq("TO") 2540 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2541 elif self._match_text_seq("WITH", "(", "MODULUS"): 2542 this = self._parse_number() 2543 self._match_text_seq(",", "REMAINDER") 2544 expression = self._parse_number() 2545 self._match_r_paren() 2546 else: 2547 self.raise_error("Failed to parse partition bound spec.") 2548 2549 return self.expression( 2550 exp.PartitionBoundSpec, 2551 this=this, 2552 expression=expression, 2553 from_expressions=from_expressions, 2554 to_expressions=to_expressions, 2555 ) 2556 2557 # https://www.postgresql.org/docs/current/sql-createtable.html 2558 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2559 if not self._match_text_seq("OF"): 2560 self._retreat(self._index - 1) 2561 return None 2562 2563 this = self._parse_table(schema=True) 2564 2565 if self._match(TokenType.DEFAULT): 2566 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2567 elif self._match_text_seq("FOR", "VALUES"): 2568 expression = self._parse_partition_bound_spec() 2569 else: 2570 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2571 2572 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2573 2574 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2575 self._match(TokenType.EQ) 2576 return self.expression( 2577 exp.PartitionedByProperty, 2578 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2579 ) 2580 2581 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2582 if self._match_text_seq("AND", "STATISTICS"): 2583 statistics = True 2584 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2585 statistics = False 2586 else: 2587 statistics = None 2588 2589 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2590 2591 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2592 if self._match_text_seq("SQL"): 2593 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2594 return None 2595 2596 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2597 if self._match_text_seq("SQL", "DATA"): 2598 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2599 return None 2600 2601 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2602 if self._match_text_seq("PRIMARY", "INDEX"): 2603 return exp.NoPrimaryIndexProperty() 2604 if self._match_text_seq("SQL"): 2605 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2606 return None 2607 2608 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2609 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2610 return exp.OnCommitProperty() 2611 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2612 return exp.OnCommitProperty(delete=True) 2613 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2614 2615 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2616 if self._match_text_seq("SQL", "DATA"): 2617 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2618 return None 2619 2620 def _parse_distkey(self) -> exp.DistKeyProperty: 2621 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2622 2623 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2624 table = self._parse_table(schema=True) 2625 2626 options = [] 2627 while self._match_texts(("INCLUDING", "EXCLUDING")): 2628 this = self._prev.text.upper() 2629 2630 id_var = self._parse_id_var() 2631 if not id_var: 2632 return None 2633 2634 options.append( 2635 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2636 ) 2637 2638 return self.expression(exp.LikeProperty, this=table, expressions=options) 2639 2640 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2641 return self.expression( 2642 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2643 ) 2644 2645 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2646 self._match(TokenType.EQ) 2647 return self.expression( 2648 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2649 ) 2650 2651 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2652 self._match_text_seq("WITH", "CONNECTION") 2653 return self.expression( 2654 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2655 ) 2656 2657 def _parse_returns(self) -> exp.ReturnsProperty: 2658 value: t.Optional[exp.Expression] 2659 null = None 2660 is_table = self._match(TokenType.TABLE) 2661 2662 if is_table: 2663 if self._match(TokenType.LT): 2664 value = self.expression( 2665 exp.Schema, 2666 this="TABLE", 2667 expressions=self._parse_csv(self._parse_struct_types), 2668 ) 2669 if not self._match(TokenType.GT): 2670 self.raise_error("Expecting >") 2671 else: 2672 value = self._parse_schema(exp.var("TABLE")) 2673 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2674 null = True 2675 value = None 2676 else: 2677 value = self._parse_types() 2678 2679 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2680 2681 def _parse_describe(self) -> exp.Describe: 2682 kind = self._match_set(self.CREATABLES) and self._prev.text 2683 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2684 if self._match(TokenType.DOT): 2685 style = None 2686 self._retreat(self._index - 2) 2687 2688 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2689 2690 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2691 this = self._parse_statement() 2692 else: 2693 this = self._parse_table(schema=True) 2694 2695 properties = self._parse_properties() 2696 expressions = properties.expressions if properties else None 2697 partition = self._parse_partition() 2698 return self.expression( 2699 exp.Describe, 2700 this=this, 2701 style=style, 2702 kind=kind, 2703 expressions=expressions, 2704 partition=partition, 2705 format=format, 2706 ) 2707 2708 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2709 kind = self._prev.text.upper() 2710 expressions = [] 2711 2712 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2713 if self._match(TokenType.WHEN): 2714 expression = self._parse_disjunction() 2715 self._match(TokenType.THEN) 2716 else: 2717 expression = None 2718 2719 else_ = self._match(TokenType.ELSE) 2720 2721 if not self._match(TokenType.INTO): 2722 return None 2723 2724 return self.expression( 2725 exp.ConditionalInsert, 2726 this=self.expression( 2727 exp.Insert, 2728 this=self._parse_table(schema=True), 2729 expression=self._parse_derived_table_values(), 2730 ), 2731 expression=expression, 2732 else_=else_, 2733 ) 2734 2735 expression = parse_conditional_insert() 2736 while expression is not None: 2737 expressions.append(expression) 2738 expression = parse_conditional_insert() 2739 2740 return self.expression( 2741 exp.MultitableInserts, 2742 kind=kind, 2743 comments=comments, 2744 expressions=expressions, 2745 source=self._parse_table(), 2746 ) 2747 2748 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2749 comments = [] 2750 hint = self._parse_hint() 2751 overwrite = self._match(TokenType.OVERWRITE) 2752 ignore = self._match(TokenType.IGNORE) 2753 local = self._match_text_seq("LOCAL") 2754 alternative = None 2755 is_function = None 2756 2757 if self._match_text_seq("DIRECTORY"): 2758 this: t.Optional[exp.Expression] = self.expression( 2759 exp.Directory, 2760 this=self._parse_var_or_string(), 2761 local=local, 2762 row_format=self._parse_row_format(match_row=True), 2763 ) 2764 else: 2765 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2766 comments += ensure_list(self._prev_comments) 2767 return self._parse_multitable_inserts(comments) 2768 2769 if self._match(TokenType.OR): 2770 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2771 2772 self._match(TokenType.INTO) 2773 comments += ensure_list(self._prev_comments) 2774 self._match(TokenType.TABLE) 2775 is_function = self._match(TokenType.FUNCTION) 2776 2777 this = ( 2778 self._parse_table(schema=True, parse_partition=True) 2779 if not is_function 2780 else self._parse_function() 2781 ) 2782 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2783 this.set("alias", self._parse_table_alias()) 2784 2785 returning = self._parse_returning() 2786 2787 return self.expression( 2788 exp.Insert, 2789 comments=comments, 2790 hint=hint, 2791 is_function=is_function, 2792 this=this, 2793 stored=self._match_text_seq("STORED") and self._parse_stored(), 2794 by_name=self._match_text_seq("BY", "NAME"), 2795 exists=self._parse_exists(), 2796 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2797 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2798 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2799 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2800 conflict=self._parse_on_conflict(), 2801 returning=returning or self._parse_returning(), 2802 overwrite=overwrite, 2803 alternative=alternative, 2804 ignore=ignore, 2805 source=self._match(TokenType.TABLE) and self._parse_table(), 2806 ) 2807 2808 def _parse_kill(self) -> exp.Kill: 2809 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2810 2811 return self.expression( 2812 exp.Kill, 2813 this=self._parse_primary(), 2814 kind=kind, 2815 ) 2816 2817 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2818 conflict = self._match_text_seq("ON", "CONFLICT") 2819 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2820 2821 if not conflict and not duplicate: 2822 return None 2823 2824 conflict_keys = None 2825 constraint = None 2826 2827 if conflict: 2828 if self._match_text_seq("ON", "CONSTRAINT"): 2829 constraint = self._parse_id_var() 2830 elif self._match(TokenType.L_PAREN): 2831 conflict_keys = self._parse_csv(self._parse_id_var) 2832 self._match_r_paren() 2833 2834 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2835 if self._prev.token_type == TokenType.UPDATE: 2836 self._match(TokenType.SET) 2837 expressions = self._parse_csv(self._parse_equality) 2838 else: 2839 expressions = None 2840 2841 return self.expression( 2842 exp.OnConflict, 2843 duplicate=duplicate, 2844 expressions=expressions, 2845 action=action, 2846 conflict_keys=conflict_keys, 2847 constraint=constraint, 2848 where=self._parse_where(), 2849 ) 2850 2851 def _parse_returning(self) -> t.Optional[exp.Returning]: 2852 if not self._match(TokenType.RETURNING): 2853 return None 2854 return self.expression( 2855 exp.Returning, 2856 expressions=self._parse_csv(self._parse_expression), 2857 into=self._match(TokenType.INTO) and self._parse_table_part(), 2858 ) 2859 2860 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2861 if not self._match(TokenType.FORMAT): 2862 return None 2863 return self._parse_row_format() 2864 2865 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2866 index = self._index 2867 with_ = with_ or self._match_text_seq("WITH") 2868 2869 if not self._match(TokenType.SERDE_PROPERTIES): 2870 self._retreat(index) 2871 return None 2872 return self.expression( 2873 exp.SerdeProperties, 2874 **{ # type: ignore 2875 "expressions": self._parse_wrapped_properties(), 2876 "with": with_, 2877 }, 2878 ) 2879 2880 def _parse_row_format( 2881 self, match_row: bool = False 2882 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2883 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2884 return None 2885 2886 if self._match_text_seq("SERDE"): 2887 this = self._parse_string() 2888 2889 serde_properties = self._parse_serde_properties() 2890 2891 return self.expression( 2892 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2893 ) 2894 2895 self._match_text_seq("DELIMITED") 2896 2897 kwargs = {} 2898 2899 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2900 kwargs["fields"] = self._parse_string() 2901 if self._match_text_seq("ESCAPED", "BY"): 2902 kwargs["escaped"] = self._parse_string() 2903 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2904 kwargs["collection_items"] = self._parse_string() 2905 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2906 kwargs["map_keys"] = self._parse_string() 2907 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2908 kwargs["lines"] = self._parse_string() 2909 if self._match_text_seq("NULL", "DEFINED", "AS"): 2910 kwargs["null"] = self._parse_string() 2911 2912 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2913 2914 def _parse_load(self) -> exp.LoadData | exp.Command: 2915 if self._match_text_seq("DATA"): 2916 local = self._match_text_seq("LOCAL") 2917 self._match_text_seq("INPATH") 2918 inpath = self._parse_string() 2919 overwrite = self._match(TokenType.OVERWRITE) 2920 self._match_pair(TokenType.INTO, TokenType.TABLE) 2921 2922 return self.expression( 2923 exp.LoadData, 2924 this=self._parse_table(schema=True), 2925 local=local, 2926 overwrite=overwrite, 2927 inpath=inpath, 2928 partition=self._parse_partition(), 2929 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2930 serde=self._match_text_seq("SERDE") and self._parse_string(), 2931 ) 2932 return self._parse_as_command(self._prev) 2933 2934 def _parse_delete(self) -> exp.Delete: 2935 # This handles MySQL's "Multiple-Table Syntax" 2936 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2937 tables = None 2938 if not self._match(TokenType.FROM, advance=False): 2939 tables = self._parse_csv(self._parse_table) or None 2940 2941 returning = self._parse_returning() 2942 2943 return self.expression( 2944 exp.Delete, 2945 tables=tables, 2946 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2947 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2948 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2949 where=self._parse_where(), 2950 returning=returning or self._parse_returning(), 2951 limit=self._parse_limit(), 2952 ) 2953 2954 def _parse_update(self) -> exp.Update: 2955 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2956 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2957 returning = self._parse_returning() 2958 return self.expression( 2959 exp.Update, 2960 **{ # type: ignore 2961 "this": this, 2962 "expressions": expressions, 2963 "from": self._parse_from(joins=True), 2964 "where": self._parse_where(), 2965 "returning": returning or self._parse_returning(), 2966 "order": self._parse_order(), 2967 "limit": self._parse_limit(), 2968 }, 2969 ) 2970 2971 def _parse_uncache(self) -> exp.Uncache: 2972 if not self._match(TokenType.TABLE): 2973 self.raise_error("Expecting TABLE after UNCACHE") 2974 2975 return self.expression( 2976 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2977 ) 2978 2979 def _parse_cache(self) -> exp.Cache: 2980 lazy = self._match_text_seq("LAZY") 2981 self._match(TokenType.TABLE) 2982 table = self._parse_table(schema=True) 2983 2984 options = [] 2985 if self._match_text_seq("OPTIONS"): 2986 self._match_l_paren() 2987 k = self._parse_string() 2988 self._match(TokenType.EQ) 2989 v = self._parse_string() 2990 options = [k, v] 2991 self._match_r_paren() 2992 2993 self._match(TokenType.ALIAS) 2994 return self.expression( 2995 exp.Cache, 2996 this=table, 2997 lazy=lazy, 2998 options=options, 2999 expression=self._parse_select(nested=True), 3000 ) 3001 3002 def _parse_partition(self) -> t.Optional[exp.Partition]: 3003 if not self._match_texts(self.PARTITION_KEYWORDS): 3004 return None 3005 3006 return self.expression( 3007 exp.Partition, 3008 subpartition=self._prev.text.upper() == "SUBPARTITION", 3009 expressions=self._parse_wrapped_csv(self._parse_assignment), 3010 ) 3011 3012 def _parse_value(self) -> t.Optional[exp.Tuple]: 3013 def _parse_value_expression() -> t.Optional[exp.Expression]: 3014 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3015 return exp.var(self._prev.text.upper()) 3016 return self._parse_expression() 3017 3018 if self._match(TokenType.L_PAREN): 3019 expressions = self._parse_csv(_parse_value_expression) 3020 self._match_r_paren() 3021 return self.expression(exp.Tuple, expressions=expressions) 3022 3023 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3024 expression = self._parse_expression() 3025 if expression: 3026 return self.expression(exp.Tuple, expressions=[expression]) 3027 return None 3028 3029 def _parse_projections(self) -> t.List[exp.Expression]: 3030 return self._parse_expressions() 3031 3032 def _parse_select( 3033 self, 3034 nested: bool = False, 3035 table: bool = False, 3036 parse_subquery_alias: bool = True, 3037 parse_set_operation: bool = True, 3038 ) -> t.Optional[exp.Expression]: 3039 cte = self._parse_with() 3040 3041 if cte: 3042 this = self._parse_statement() 3043 3044 if not this: 3045 self.raise_error("Failed to parse any statement following CTE") 3046 return cte 3047 3048 if "with" in this.arg_types: 3049 this.set("with", cte) 3050 else: 3051 self.raise_error(f"{this.key} does not support CTE") 3052 this = cte 3053 3054 return this 3055 3056 # duckdb supports leading with FROM x 3057 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3058 3059 if self._match(TokenType.SELECT): 3060 comments = self._prev_comments 3061 3062 hint = self._parse_hint() 3063 3064 if self._next and not self._next.token_type == TokenType.DOT: 3065 all_ = self._match(TokenType.ALL) 3066 distinct = self._match_set(self.DISTINCT_TOKENS) 3067 else: 3068 all_, distinct = None, None 3069 3070 kind = ( 3071 self._match(TokenType.ALIAS) 3072 and self._match_texts(("STRUCT", "VALUE")) 3073 and self._prev.text.upper() 3074 ) 3075 3076 if distinct: 3077 distinct = self.expression( 3078 exp.Distinct, 3079 on=self._parse_value() if self._match(TokenType.ON) else None, 3080 ) 3081 3082 if all_ and distinct: 3083 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3084 3085 operation_modifiers = [] 3086 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3087 operation_modifiers.append(exp.var(self._prev.text.upper())) 3088 3089 limit = self._parse_limit(top=True) 3090 projections = self._parse_projections() 3091 3092 this = self.expression( 3093 exp.Select, 3094 kind=kind, 3095 hint=hint, 3096 distinct=distinct, 3097 expressions=projections, 3098 limit=limit, 3099 operation_modifiers=operation_modifiers or None, 3100 ) 3101 this.comments = comments 3102 3103 into = self._parse_into() 3104 if into: 3105 this.set("into", into) 3106 3107 if not from_: 3108 from_ = self._parse_from() 3109 3110 if from_: 3111 this.set("from", from_) 3112 3113 this = self._parse_query_modifiers(this) 3114 elif (table or nested) and self._match(TokenType.L_PAREN): 3115 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3116 this = self._parse_simplified_pivot( 3117 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3118 ) 3119 elif self._match(TokenType.FROM): 3120 from_ = self._parse_from(skip_from_token=True) 3121 # Support parentheses for duckdb FROM-first syntax 3122 select = self._parse_select() 3123 if select: 3124 select.set("from", from_) 3125 this = select 3126 else: 3127 this = exp.select("*").from_(t.cast(exp.From, from_)) 3128 else: 3129 this = ( 3130 self._parse_table() 3131 if table 3132 else self._parse_select(nested=True, parse_set_operation=False) 3133 ) 3134 3135 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3136 # in case a modifier (e.g. join) is following 3137 if table and isinstance(this, exp.Values) and this.alias: 3138 alias = this.args["alias"].pop() 3139 this = exp.Table(this=this, alias=alias) 3140 3141 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3142 3143 self._match_r_paren() 3144 3145 # We return early here so that the UNION isn't attached to the subquery by the 3146 # following call to _parse_set_operations, but instead becomes the parent node 3147 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3148 elif self._match(TokenType.VALUES, advance=False): 3149 this = self._parse_derived_table_values() 3150 elif from_: 3151 this = exp.select("*").from_(from_.this, copy=False) 3152 elif self._match(TokenType.SUMMARIZE): 3153 table = self._match(TokenType.TABLE) 3154 this = self._parse_select() or self._parse_string() or self._parse_table() 3155 return self.expression(exp.Summarize, this=this, table=table) 3156 elif self._match(TokenType.DESCRIBE): 3157 this = self._parse_describe() 3158 elif self._match_text_seq("STREAM"): 3159 this = self._parse_function() 3160 if this: 3161 this = self.expression(exp.Stream, this=this) 3162 else: 3163 self._retreat(self._index - 1) 3164 else: 3165 this = None 3166 3167 return self._parse_set_operations(this) if parse_set_operation else this 3168 3169 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3170 if not skip_with_token and not self._match(TokenType.WITH): 3171 return None 3172 3173 comments = self._prev_comments 3174 recursive = self._match(TokenType.RECURSIVE) 3175 3176 last_comments = None 3177 expressions = [] 3178 while True: 3179 cte = self._parse_cte() 3180 if isinstance(cte, exp.CTE): 3181 expressions.append(cte) 3182 if last_comments: 3183 cte.add_comments(last_comments) 3184 3185 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3186 break 3187 else: 3188 self._match(TokenType.WITH) 3189 3190 last_comments = self._prev_comments 3191 3192 return self.expression( 3193 exp.With, comments=comments, expressions=expressions, recursive=recursive 3194 ) 3195 3196 def _parse_cte(self) -> t.Optional[exp.CTE]: 3197 index = self._index 3198 3199 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3200 if not alias or not alias.this: 3201 self.raise_error("Expected CTE to have alias") 3202 3203 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3204 self._retreat(index) 3205 return None 3206 3207 comments = self._prev_comments 3208 3209 if self._match_text_seq("NOT", "MATERIALIZED"): 3210 materialized = False 3211 elif self._match_text_seq("MATERIALIZED"): 3212 materialized = True 3213 else: 3214 materialized = None 3215 3216 cte = self.expression( 3217 exp.CTE, 3218 this=self._parse_wrapped(self._parse_statement), 3219 alias=alias, 3220 materialized=materialized, 3221 comments=comments, 3222 ) 3223 3224 if isinstance(cte.this, exp.Values): 3225 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3226 3227 return cte 3228 3229 def _parse_table_alias( 3230 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3231 ) -> t.Optional[exp.TableAlias]: 3232 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3233 # so this section tries to parse the clause version and if it fails, it treats the token 3234 # as an identifier (alias) 3235 if self._can_parse_limit_or_offset(): 3236 return None 3237 3238 any_token = self._match(TokenType.ALIAS) 3239 alias = ( 3240 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3241 or self._parse_string_as_identifier() 3242 ) 3243 3244 index = self._index 3245 if self._match(TokenType.L_PAREN): 3246 columns = self._parse_csv(self._parse_function_parameter) 3247 self._match_r_paren() if columns else self._retreat(index) 3248 else: 3249 columns = None 3250 3251 if not alias and not columns: 3252 return None 3253 3254 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3255 3256 # We bubble up comments from the Identifier to the TableAlias 3257 if isinstance(alias, exp.Identifier): 3258 table_alias.add_comments(alias.pop_comments()) 3259 3260 return table_alias 3261 3262 def _parse_subquery( 3263 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3264 ) -> t.Optional[exp.Subquery]: 3265 if not this: 3266 return None 3267 3268 return self.expression( 3269 exp.Subquery, 3270 this=this, 3271 pivots=self._parse_pivots(), 3272 alias=self._parse_table_alias() if parse_alias else None, 3273 sample=self._parse_table_sample(), 3274 ) 3275 3276 def _implicit_unnests_to_explicit(self, this: E) -> E: 3277 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3278 3279 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3280 for i, join in enumerate(this.args.get("joins") or []): 3281 table = join.this 3282 normalized_table = table.copy() 3283 normalized_table.meta["maybe_column"] = True 3284 normalized_table = _norm(normalized_table, dialect=self.dialect) 3285 3286 if isinstance(table, exp.Table) and not join.args.get("on"): 3287 if normalized_table.parts[0].name in refs: 3288 table_as_column = table.to_column() 3289 unnest = exp.Unnest(expressions=[table_as_column]) 3290 3291 # Table.to_column creates a parent Alias node that we want to convert to 3292 # a TableAlias and attach to the Unnest, so it matches the parser's output 3293 if isinstance(table.args.get("alias"), exp.TableAlias): 3294 table_as_column.replace(table_as_column.this) 3295 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3296 3297 table.replace(unnest) 3298 3299 refs.add(normalized_table.alias_or_name) 3300 3301 return this 3302 3303 def _parse_query_modifiers( 3304 self, this: t.Optional[exp.Expression] 3305 ) -> t.Optional[exp.Expression]: 3306 if isinstance(this, (exp.Query, exp.Table)): 3307 for join in self._parse_joins(): 3308 this.append("joins", join) 3309 for lateral in iter(self._parse_lateral, None): 3310 this.append("laterals", lateral) 3311 3312 while True: 3313 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3314 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3315 key, expression = parser(self) 3316 3317 if expression: 3318 this.set(key, expression) 3319 if key == "limit": 3320 offset = expression.args.pop("offset", None) 3321 3322 if offset: 3323 offset = exp.Offset(expression=offset) 3324 this.set("offset", offset) 3325 3326 limit_by_expressions = expression.expressions 3327 expression.set("expressions", None) 3328 offset.set("expressions", limit_by_expressions) 3329 continue 3330 break 3331 3332 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3333 this = self._implicit_unnests_to_explicit(this) 3334 3335 return this 3336 3337 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3338 start = self._curr 3339 while self._curr: 3340 self._advance() 3341 3342 end = self._tokens[self._index - 1] 3343 return exp.Hint(expressions=[self._find_sql(start, end)]) 3344 3345 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3346 return self._parse_function_call() 3347 3348 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3349 start_index = self._index 3350 should_fallback_to_string = False 3351 3352 hints = [] 3353 try: 3354 for hint in iter( 3355 lambda: self._parse_csv( 3356 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3357 ), 3358 [], 3359 ): 3360 hints.extend(hint) 3361 except ParseError: 3362 should_fallback_to_string = True 3363 3364 if should_fallback_to_string or self._curr: 3365 self._retreat(start_index) 3366 return self._parse_hint_fallback_to_string() 3367 3368 return self.expression(exp.Hint, expressions=hints) 3369 3370 def _parse_hint(self) -> t.Optional[exp.Hint]: 3371 if self._match(TokenType.HINT) and self._prev_comments: 3372 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3373 3374 return None 3375 3376 def _parse_into(self) -> t.Optional[exp.Into]: 3377 if not self._match(TokenType.INTO): 3378 return None 3379 3380 temp = self._match(TokenType.TEMPORARY) 3381 unlogged = self._match_text_seq("UNLOGGED") 3382 self._match(TokenType.TABLE) 3383 3384 return self.expression( 3385 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3386 ) 3387 3388 def _parse_from( 3389 self, joins: bool = False, skip_from_token: bool = False 3390 ) -> t.Optional[exp.From]: 3391 if not skip_from_token and not self._match(TokenType.FROM): 3392 return None 3393 3394 return self.expression( 3395 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3396 ) 3397 3398 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3399 return self.expression( 3400 exp.MatchRecognizeMeasure, 3401 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3402 this=self._parse_expression(), 3403 ) 3404 3405 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3406 if not self._match(TokenType.MATCH_RECOGNIZE): 3407 return None 3408 3409 self._match_l_paren() 3410 3411 partition = self._parse_partition_by() 3412 order = self._parse_order() 3413 3414 measures = ( 3415 self._parse_csv(self._parse_match_recognize_measure) 3416 if self._match_text_seq("MEASURES") 3417 else None 3418 ) 3419 3420 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3421 rows = exp.var("ONE ROW PER MATCH") 3422 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3423 text = "ALL ROWS PER MATCH" 3424 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3425 text += " SHOW EMPTY MATCHES" 3426 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3427 text += " OMIT EMPTY MATCHES" 3428 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3429 text += " WITH UNMATCHED ROWS" 3430 rows = exp.var(text) 3431 else: 3432 rows = None 3433 3434 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3435 text = "AFTER MATCH SKIP" 3436 if self._match_text_seq("PAST", "LAST", "ROW"): 3437 text += " PAST LAST ROW" 3438 elif self._match_text_seq("TO", "NEXT", "ROW"): 3439 text += " TO NEXT ROW" 3440 elif self._match_text_seq("TO", "FIRST"): 3441 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3442 elif self._match_text_seq("TO", "LAST"): 3443 text += f" TO LAST {self._advance_any().text}" # type: ignore 3444 after = exp.var(text) 3445 else: 3446 after = None 3447 3448 if self._match_text_seq("PATTERN"): 3449 self._match_l_paren() 3450 3451 if not self._curr: 3452 self.raise_error("Expecting )", self._curr) 3453 3454 paren = 1 3455 start = self._curr 3456 3457 while self._curr and paren > 0: 3458 if self._curr.token_type == TokenType.L_PAREN: 3459 paren += 1 3460 if self._curr.token_type == TokenType.R_PAREN: 3461 paren -= 1 3462 3463 end = self._prev 3464 self._advance() 3465 3466 if paren > 0: 3467 self.raise_error("Expecting )", self._curr) 3468 3469 pattern = exp.var(self._find_sql(start, end)) 3470 else: 3471 pattern = None 3472 3473 define = ( 3474 self._parse_csv(self._parse_name_as_expression) 3475 if self._match_text_seq("DEFINE") 3476 else None 3477 ) 3478 3479 self._match_r_paren() 3480 3481 return self.expression( 3482 exp.MatchRecognize, 3483 partition_by=partition, 3484 order=order, 3485 measures=measures, 3486 rows=rows, 3487 after=after, 3488 pattern=pattern, 3489 define=define, 3490 alias=self._parse_table_alias(), 3491 ) 3492 3493 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3494 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3495 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3496 cross_apply = False 3497 3498 if cross_apply is not None: 3499 this = self._parse_select(table=True) 3500 view = None 3501 outer = None 3502 elif self._match(TokenType.LATERAL): 3503 this = self._parse_select(table=True) 3504 view = self._match(TokenType.VIEW) 3505 outer = self._match(TokenType.OUTER) 3506 else: 3507 return None 3508 3509 if not this: 3510 this = ( 3511 self._parse_unnest() 3512 or self._parse_function() 3513 or self._parse_id_var(any_token=False) 3514 ) 3515 3516 while self._match(TokenType.DOT): 3517 this = exp.Dot( 3518 this=this, 3519 expression=self._parse_function() or self._parse_id_var(any_token=False), 3520 ) 3521 3522 if view: 3523 table = self._parse_id_var(any_token=False) 3524 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3525 table_alias: t.Optional[exp.TableAlias] = self.expression( 3526 exp.TableAlias, this=table, columns=columns 3527 ) 3528 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3529 # We move the alias from the lateral's child node to the lateral itself 3530 table_alias = this.args["alias"].pop() 3531 else: 3532 table_alias = self._parse_table_alias() 3533 3534 return self.expression( 3535 exp.Lateral, 3536 this=this, 3537 view=view, 3538 outer=outer, 3539 alias=table_alias, 3540 cross_apply=cross_apply, 3541 ) 3542 3543 def _parse_join_parts( 3544 self, 3545 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3546 return ( 3547 self._match_set(self.JOIN_METHODS) and self._prev, 3548 self._match_set(self.JOIN_SIDES) and self._prev, 3549 self._match_set(self.JOIN_KINDS) and self._prev, 3550 ) 3551 3552 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3553 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3554 this = self._parse_column() 3555 if isinstance(this, exp.Column): 3556 return this.this 3557 return this 3558 3559 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3560 3561 def _parse_join( 3562 self, skip_join_token: bool = False, parse_bracket: bool = False 3563 ) -> t.Optional[exp.Join]: 3564 if self._match(TokenType.COMMA): 3565 return self.expression(exp.Join, this=self._parse_table()) 3566 3567 index = self._index 3568 method, side, kind = self._parse_join_parts() 3569 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3570 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3571 3572 if not skip_join_token and not join: 3573 self._retreat(index) 3574 kind = None 3575 method = None 3576 side = None 3577 3578 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3579 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3580 3581 if not skip_join_token and not join and not outer_apply and not cross_apply: 3582 return None 3583 3584 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3585 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3586 kwargs["expressions"] = self._parse_csv( 3587 lambda: self._parse_table(parse_bracket=parse_bracket) 3588 ) 3589 3590 if method: 3591 kwargs["method"] = method.text 3592 if side: 3593 kwargs["side"] = side.text 3594 if kind: 3595 kwargs["kind"] = kind.text 3596 if hint: 3597 kwargs["hint"] = hint 3598 3599 if self._match(TokenType.MATCH_CONDITION): 3600 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3601 3602 if self._match(TokenType.ON): 3603 kwargs["on"] = self._parse_assignment() 3604 elif self._match(TokenType.USING): 3605 kwargs["using"] = self._parse_using_identifiers() 3606 elif ( 3607 not (outer_apply or cross_apply) 3608 and not isinstance(kwargs["this"], exp.Unnest) 3609 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3610 ): 3611 index = self._index 3612 joins: t.Optional[list] = list(self._parse_joins()) 3613 3614 if joins and self._match(TokenType.ON): 3615 kwargs["on"] = self._parse_assignment() 3616 elif joins and self._match(TokenType.USING): 3617 kwargs["using"] = self._parse_using_identifiers() 3618 else: 3619 joins = None 3620 self._retreat(index) 3621 3622 kwargs["this"].set("joins", joins if joins else None) 3623 3624 comments = [c for token in (method, side, kind) if token for c in token.comments] 3625 return self.expression(exp.Join, comments=comments, **kwargs) 3626 3627 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3628 this = self._parse_assignment() 3629 3630 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3631 return this 3632 3633 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3634 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3635 3636 return this 3637 3638 def _parse_index_params(self) -> exp.IndexParameters: 3639 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3640 3641 if self._match(TokenType.L_PAREN, advance=False): 3642 columns = self._parse_wrapped_csv(self._parse_with_operator) 3643 else: 3644 columns = None 3645 3646 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3647 partition_by = self._parse_partition_by() 3648 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3649 tablespace = ( 3650 self._parse_var(any_token=True) 3651 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3652 else None 3653 ) 3654 where = self._parse_where() 3655 3656 on = self._parse_field() if self._match(TokenType.ON) else None 3657 3658 return self.expression( 3659 exp.IndexParameters, 3660 using=using, 3661 columns=columns, 3662 include=include, 3663 partition_by=partition_by, 3664 where=where, 3665 with_storage=with_storage, 3666 tablespace=tablespace, 3667 on=on, 3668 ) 3669 3670 def _parse_index( 3671 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3672 ) -> t.Optional[exp.Index]: 3673 if index or anonymous: 3674 unique = None 3675 primary = None 3676 amp = None 3677 3678 self._match(TokenType.ON) 3679 self._match(TokenType.TABLE) # hive 3680 table = self._parse_table_parts(schema=True) 3681 else: 3682 unique = self._match(TokenType.UNIQUE) 3683 primary = self._match_text_seq("PRIMARY") 3684 amp = self._match_text_seq("AMP") 3685 3686 if not self._match(TokenType.INDEX): 3687 return None 3688 3689 index = self._parse_id_var() 3690 table = None 3691 3692 params = self._parse_index_params() 3693 3694 return self.expression( 3695 exp.Index, 3696 this=index, 3697 table=table, 3698 unique=unique, 3699 primary=primary, 3700 amp=amp, 3701 params=params, 3702 ) 3703 3704 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3705 hints: t.List[exp.Expression] = [] 3706 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3707 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3708 hints.append( 3709 self.expression( 3710 exp.WithTableHint, 3711 expressions=self._parse_csv( 3712 lambda: self._parse_function() or self._parse_var(any_token=True) 3713 ), 3714 ) 3715 ) 3716 self._match_r_paren() 3717 else: 3718 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3719 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3720 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3721 3722 self._match_set((TokenType.INDEX, TokenType.KEY)) 3723 if self._match(TokenType.FOR): 3724 hint.set("target", self._advance_any() and self._prev.text.upper()) 3725 3726 hint.set("expressions", self._parse_wrapped_id_vars()) 3727 hints.append(hint) 3728 3729 return hints or None 3730 3731 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3732 return ( 3733 (not schema and self._parse_function(optional_parens=False)) 3734 or self._parse_id_var(any_token=False) 3735 or self._parse_string_as_identifier() 3736 or self._parse_placeholder() 3737 ) 3738 3739 def _parse_table_parts( 3740 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3741 ) -> exp.Table: 3742 catalog = None 3743 db = None 3744 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3745 3746 while self._match(TokenType.DOT): 3747 if catalog: 3748 # This allows nesting the table in arbitrarily many dot expressions if needed 3749 table = self.expression( 3750 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3751 ) 3752 else: 3753 catalog = db 3754 db = table 3755 # "" used for tsql FROM a..b case 3756 table = self._parse_table_part(schema=schema) or "" 3757 3758 if ( 3759 wildcard 3760 and self._is_connected() 3761 and (isinstance(table, exp.Identifier) or not table) 3762 and self._match(TokenType.STAR) 3763 ): 3764 if isinstance(table, exp.Identifier): 3765 table.args["this"] += "*" 3766 else: 3767 table = exp.Identifier(this="*") 3768 3769 # We bubble up comments from the Identifier to the Table 3770 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3771 3772 if is_db_reference: 3773 catalog = db 3774 db = table 3775 table = None 3776 3777 if not table and not is_db_reference: 3778 self.raise_error(f"Expected table name but got {self._curr}") 3779 if not db and is_db_reference: 3780 self.raise_error(f"Expected database name but got {self._curr}") 3781 3782 table = self.expression( 3783 exp.Table, 3784 comments=comments, 3785 this=table, 3786 db=db, 3787 catalog=catalog, 3788 ) 3789 3790 changes = self._parse_changes() 3791 if changes: 3792 table.set("changes", changes) 3793 3794 at_before = self._parse_historical_data() 3795 if at_before: 3796 table.set("when", at_before) 3797 3798 pivots = self._parse_pivots() 3799 if pivots: 3800 table.set("pivots", pivots) 3801 3802 return table 3803 3804 def _parse_table( 3805 self, 3806 schema: bool = False, 3807 joins: bool = False, 3808 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3809 parse_bracket: bool = False, 3810 is_db_reference: bool = False, 3811 parse_partition: bool = False, 3812 ) -> t.Optional[exp.Expression]: 3813 lateral = self._parse_lateral() 3814 if lateral: 3815 return lateral 3816 3817 unnest = self._parse_unnest() 3818 if unnest: 3819 return unnest 3820 3821 values = self._parse_derived_table_values() 3822 if values: 3823 return values 3824 3825 subquery = self._parse_select(table=True) 3826 if subquery: 3827 if not subquery.args.get("pivots"): 3828 subquery.set("pivots", self._parse_pivots()) 3829 return subquery 3830 3831 bracket = parse_bracket and self._parse_bracket(None) 3832 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3833 3834 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3835 self._parse_table 3836 ) 3837 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3838 3839 only = self._match(TokenType.ONLY) 3840 3841 this = t.cast( 3842 exp.Expression, 3843 bracket 3844 or rows_from 3845 or self._parse_bracket( 3846 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3847 ), 3848 ) 3849 3850 if only: 3851 this.set("only", only) 3852 3853 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3854 self._match_text_seq("*") 3855 3856 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3857 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3858 this.set("partition", self._parse_partition()) 3859 3860 if schema: 3861 return self._parse_schema(this=this) 3862 3863 version = self._parse_version() 3864 3865 if version: 3866 this.set("version", version) 3867 3868 if self.dialect.ALIAS_POST_TABLESAMPLE: 3869 this.set("sample", self._parse_table_sample()) 3870 3871 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3872 if alias: 3873 this.set("alias", alias) 3874 3875 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3876 return self.expression( 3877 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3878 ) 3879 3880 this.set("hints", self._parse_table_hints()) 3881 3882 if not this.args.get("pivots"): 3883 this.set("pivots", self._parse_pivots()) 3884 3885 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3886 this.set("sample", self._parse_table_sample()) 3887 3888 if joins: 3889 for join in self._parse_joins(): 3890 this.append("joins", join) 3891 3892 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3893 this.set("ordinality", True) 3894 this.set("alias", self._parse_table_alias()) 3895 3896 return this 3897 3898 def _parse_version(self) -> t.Optional[exp.Version]: 3899 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3900 this = "TIMESTAMP" 3901 elif self._match(TokenType.VERSION_SNAPSHOT): 3902 this = "VERSION" 3903 else: 3904 return None 3905 3906 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3907 kind = self._prev.text.upper() 3908 start = self._parse_bitwise() 3909 self._match_texts(("TO", "AND")) 3910 end = self._parse_bitwise() 3911 expression: t.Optional[exp.Expression] = self.expression( 3912 exp.Tuple, expressions=[start, end] 3913 ) 3914 elif self._match_text_seq("CONTAINED", "IN"): 3915 kind = "CONTAINED IN" 3916 expression = self.expression( 3917 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3918 ) 3919 elif self._match(TokenType.ALL): 3920 kind = "ALL" 3921 expression = None 3922 else: 3923 self._match_text_seq("AS", "OF") 3924 kind = "AS OF" 3925 expression = self._parse_type() 3926 3927 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3928 3929 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3930 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3931 index = self._index 3932 historical_data = None 3933 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3934 this = self._prev.text.upper() 3935 kind = ( 3936 self._match(TokenType.L_PAREN) 3937 and self._match_texts(self.HISTORICAL_DATA_KIND) 3938 and self._prev.text.upper() 3939 ) 3940 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3941 3942 if expression: 3943 self._match_r_paren() 3944 historical_data = self.expression( 3945 exp.HistoricalData, this=this, kind=kind, expression=expression 3946 ) 3947 else: 3948 self._retreat(index) 3949 3950 return historical_data 3951 3952 def _parse_changes(self) -> t.Optional[exp.Changes]: 3953 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3954 return None 3955 3956 information = self._parse_var(any_token=True) 3957 self._match_r_paren() 3958 3959 return self.expression( 3960 exp.Changes, 3961 information=information, 3962 at_before=self._parse_historical_data(), 3963 end=self._parse_historical_data(), 3964 ) 3965 3966 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3967 if not self._match(TokenType.UNNEST): 3968 return None 3969 3970 expressions = self._parse_wrapped_csv(self._parse_equality) 3971 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3972 3973 alias = self._parse_table_alias() if with_alias else None 3974 3975 if alias: 3976 if self.dialect.UNNEST_COLUMN_ONLY: 3977 if alias.args.get("columns"): 3978 self.raise_error("Unexpected extra column alias in unnest.") 3979 3980 alias.set("columns", [alias.this]) 3981 alias.set("this", None) 3982 3983 columns = alias.args.get("columns") or [] 3984 if offset and len(expressions) < len(columns): 3985 offset = columns.pop() 3986 3987 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3988 self._match(TokenType.ALIAS) 3989 offset = self._parse_id_var( 3990 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3991 ) or exp.to_identifier("offset") 3992 3993 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3994 3995 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3996 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3997 if not is_derived and not ( 3998 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3999 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4000 ): 4001 return None 4002 4003 expressions = self._parse_csv(self._parse_value) 4004 alias = self._parse_table_alias() 4005 4006 if is_derived: 4007 self._match_r_paren() 4008 4009 return self.expression( 4010 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4011 ) 4012 4013 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4014 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4015 as_modifier and self._match_text_seq("USING", "SAMPLE") 4016 ): 4017 return None 4018 4019 bucket_numerator = None 4020 bucket_denominator = None 4021 bucket_field = None 4022 percent = None 4023 size = None 4024 seed = None 4025 4026 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4027 matched_l_paren = self._match(TokenType.L_PAREN) 4028 4029 if self.TABLESAMPLE_CSV: 4030 num = None 4031 expressions = self._parse_csv(self._parse_primary) 4032 else: 4033 expressions = None 4034 num = ( 4035 self._parse_factor() 4036 if self._match(TokenType.NUMBER, advance=False) 4037 else self._parse_primary() or self._parse_placeholder() 4038 ) 4039 4040 if self._match_text_seq("BUCKET"): 4041 bucket_numerator = self._parse_number() 4042 self._match_text_seq("OUT", "OF") 4043 bucket_denominator = bucket_denominator = self._parse_number() 4044 self._match(TokenType.ON) 4045 bucket_field = self._parse_field() 4046 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4047 percent = num 4048 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4049 size = num 4050 else: 4051 percent = num 4052 4053 if matched_l_paren: 4054 self._match_r_paren() 4055 4056 if self._match(TokenType.L_PAREN): 4057 method = self._parse_var(upper=True) 4058 seed = self._match(TokenType.COMMA) and self._parse_number() 4059 self._match_r_paren() 4060 elif self._match_texts(("SEED", "REPEATABLE")): 4061 seed = self._parse_wrapped(self._parse_number) 4062 4063 if not method and self.DEFAULT_SAMPLING_METHOD: 4064 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4065 4066 return self.expression( 4067 exp.TableSample, 4068 expressions=expressions, 4069 method=method, 4070 bucket_numerator=bucket_numerator, 4071 bucket_denominator=bucket_denominator, 4072 bucket_field=bucket_field, 4073 percent=percent, 4074 size=size, 4075 seed=seed, 4076 ) 4077 4078 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4079 return list(iter(self._parse_pivot, None)) or None 4080 4081 def _parse_joins(self) -> t.Iterator[exp.Join]: 4082 return iter(self._parse_join, None) 4083 4084 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4085 if not self._match(TokenType.INTO): 4086 return None 4087 4088 return self.expression( 4089 exp.UnpivotColumns, 4090 this=self._match_text_seq("NAME") and self._parse_column(), 4091 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4092 ) 4093 4094 # https://duckdb.org/docs/sql/statements/pivot 4095 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4096 def _parse_on() -> t.Optional[exp.Expression]: 4097 this = self._parse_bitwise() 4098 4099 if self._match(TokenType.IN): 4100 # PIVOT ... ON col IN (row_val1, row_val2) 4101 return self._parse_in(this) 4102 if self._match(TokenType.ALIAS, advance=False): 4103 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4104 return self._parse_alias(this) 4105 4106 return this 4107 4108 this = self._parse_table() 4109 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4110 into = self._parse_unpivot_columns() 4111 using = self._match(TokenType.USING) and self._parse_csv( 4112 lambda: self._parse_alias(self._parse_function()) 4113 ) 4114 group = self._parse_group() 4115 4116 return self.expression( 4117 exp.Pivot, 4118 this=this, 4119 expressions=expressions, 4120 using=using, 4121 group=group, 4122 unpivot=is_unpivot, 4123 into=into, 4124 ) 4125 4126 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4127 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4128 this = self._parse_select_or_expression() 4129 4130 self._match(TokenType.ALIAS) 4131 alias = self._parse_bitwise() 4132 if alias: 4133 if isinstance(alias, exp.Column) and not alias.db: 4134 alias = alias.this 4135 return self.expression(exp.PivotAlias, this=this, alias=alias) 4136 4137 return this 4138 4139 value = self._parse_column() 4140 4141 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4142 self.raise_error("Expecting IN (") 4143 4144 if self._match(TokenType.ANY): 4145 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4146 else: 4147 exprs = self._parse_csv(_parse_aliased_expression) 4148 4149 self._match_r_paren() 4150 return self.expression(exp.In, this=value, expressions=exprs) 4151 4152 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4153 index = self._index 4154 include_nulls = None 4155 4156 if self._match(TokenType.PIVOT): 4157 unpivot = False 4158 elif self._match(TokenType.UNPIVOT): 4159 unpivot = True 4160 4161 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4162 if self._match_text_seq("INCLUDE", "NULLS"): 4163 include_nulls = True 4164 elif self._match_text_seq("EXCLUDE", "NULLS"): 4165 include_nulls = False 4166 else: 4167 return None 4168 4169 expressions = [] 4170 4171 if not self._match(TokenType.L_PAREN): 4172 self._retreat(index) 4173 return None 4174 4175 if unpivot: 4176 expressions = self._parse_csv(self._parse_column) 4177 else: 4178 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4179 4180 if not expressions: 4181 self.raise_error("Failed to parse PIVOT's aggregation list") 4182 4183 if not self._match(TokenType.FOR): 4184 self.raise_error("Expecting FOR") 4185 4186 field = self._parse_pivot_in() 4187 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4188 self._parse_bitwise 4189 ) 4190 4191 self._match_r_paren() 4192 4193 pivot = self.expression( 4194 exp.Pivot, 4195 expressions=expressions, 4196 field=field, 4197 unpivot=unpivot, 4198 include_nulls=include_nulls, 4199 default_on_null=default_on_null, 4200 ) 4201 4202 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4203 pivot.set("alias", self._parse_table_alias()) 4204 4205 if not unpivot: 4206 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4207 4208 columns: t.List[exp.Expression] = [] 4209 for fld in pivot.args["field"].expressions: 4210 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4211 for name in names: 4212 if self.PREFIXED_PIVOT_COLUMNS: 4213 name = f"{name}_{field_name}" if name else field_name 4214 else: 4215 name = f"{field_name}_{name}" if name else field_name 4216 4217 columns.append(exp.to_identifier(name)) 4218 4219 pivot.set("columns", columns) 4220 4221 return pivot 4222 4223 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4224 return [agg.alias for agg in aggregations] 4225 4226 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4227 if not skip_where_token and not self._match(TokenType.PREWHERE): 4228 return None 4229 4230 return self.expression( 4231 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4232 ) 4233 4234 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4235 if not skip_where_token and not self._match(TokenType.WHERE): 4236 return None 4237 4238 return self.expression( 4239 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4240 ) 4241 4242 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4243 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4244 return None 4245 4246 elements: t.Dict[str, t.Any] = defaultdict(list) 4247 4248 if self._match(TokenType.ALL): 4249 elements["all"] = True 4250 elif self._match(TokenType.DISTINCT): 4251 elements["all"] = False 4252 4253 while True: 4254 index = self._index 4255 4256 elements["expressions"].extend( 4257 self._parse_csv( 4258 lambda: None 4259 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4260 else self._parse_assignment() 4261 ) 4262 ) 4263 4264 before_with_index = self._index 4265 with_prefix = self._match(TokenType.WITH) 4266 4267 if self._match(TokenType.ROLLUP): 4268 elements["rollup"].append( 4269 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4270 ) 4271 elif self._match(TokenType.CUBE): 4272 elements["cube"].append( 4273 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4274 ) 4275 elif self._match(TokenType.GROUPING_SETS): 4276 elements["grouping_sets"].append( 4277 self.expression( 4278 exp.GroupingSets, 4279 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4280 ) 4281 ) 4282 elif self._match_text_seq("TOTALS"): 4283 elements["totals"] = True # type: ignore 4284 4285 if before_with_index <= self._index <= before_with_index + 1: 4286 self._retreat(before_with_index) 4287 break 4288 4289 if index == self._index: 4290 break 4291 4292 return self.expression(exp.Group, **elements) # type: ignore 4293 4294 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4295 return self.expression( 4296 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4297 ) 4298 4299 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4300 if self._match(TokenType.L_PAREN): 4301 grouping_set = self._parse_csv(self._parse_column) 4302 self._match_r_paren() 4303 return self.expression(exp.Tuple, expressions=grouping_set) 4304 4305 return self._parse_column() 4306 4307 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4308 if not skip_having_token and not self._match(TokenType.HAVING): 4309 return None 4310 return self.expression(exp.Having, this=self._parse_assignment()) 4311 4312 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4313 if not self._match(TokenType.QUALIFY): 4314 return None 4315 return self.expression(exp.Qualify, this=self._parse_assignment()) 4316 4317 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4318 if skip_start_token: 4319 start = None 4320 elif self._match(TokenType.START_WITH): 4321 start = self._parse_assignment() 4322 else: 4323 return None 4324 4325 self._match(TokenType.CONNECT_BY) 4326 nocycle = self._match_text_seq("NOCYCLE") 4327 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4328 exp.Prior, this=self._parse_bitwise() 4329 ) 4330 connect = self._parse_assignment() 4331 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4332 4333 if not start and self._match(TokenType.START_WITH): 4334 start = self._parse_assignment() 4335 4336 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4337 4338 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4339 this = self._parse_id_var(any_token=True) 4340 if self._match(TokenType.ALIAS): 4341 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4342 return this 4343 4344 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4345 if self._match_text_seq("INTERPOLATE"): 4346 return self._parse_wrapped_csv(self._parse_name_as_expression) 4347 return None 4348 4349 def _parse_order( 4350 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4351 ) -> t.Optional[exp.Expression]: 4352 siblings = None 4353 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4354 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4355 return this 4356 4357 siblings = True 4358 4359 return self.expression( 4360 exp.Order, 4361 this=this, 4362 expressions=self._parse_csv(self._parse_ordered), 4363 siblings=siblings, 4364 ) 4365 4366 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4367 if not self._match(token): 4368 return None 4369 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4370 4371 def _parse_ordered( 4372 self, parse_method: t.Optional[t.Callable] = None 4373 ) -> t.Optional[exp.Ordered]: 4374 this = parse_method() if parse_method else self._parse_assignment() 4375 if not this: 4376 return None 4377 4378 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4379 this = exp.var("ALL") 4380 4381 asc = self._match(TokenType.ASC) 4382 desc = self._match(TokenType.DESC) or (asc and False) 4383 4384 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4385 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4386 4387 nulls_first = is_nulls_first or False 4388 explicitly_null_ordered = is_nulls_first or is_nulls_last 4389 4390 if ( 4391 not explicitly_null_ordered 4392 and ( 4393 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4394 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4395 ) 4396 and self.dialect.NULL_ORDERING != "nulls_are_last" 4397 ): 4398 nulls_first = True 4399 4400 if self._match_text_seq("WITH", "FILL"): 4401 with_fill = self.expression( 4402 exp.WithFill, 4403 **{ # type: ignore 4404 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4405 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4406 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4407 "interpolate": self._parse_interpolate(), 4408 }, 4409 ) 4410 else: 4411 with_fill = None 4412 4413 return self.expression( 4414 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4415 ) 4416 4417 def _parse_limit( 4418 self, 4419 this: t.Optional[exp.Expression] = None, 4420 top: bool = False, 4421 skip_limit_token: bool = False, 4422 ) -> t.Optional[exp.Expression]: 4423 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4424 comments = self._prev_comments 4425 if top: 4426 limit_paren = self._match(TokenType.L_PAREN) 4427 expression = self._parse_term() if limit_paren else self._parse_number() 4428 4429 if limit_paren: 4430 self._match_r_paren() 4431 else: 4432 expression = self._parse_term() 4433 4434 if self._match(TokenType.COMMA): 4435 offset = expression 4436 expression = self._parse_term() 4437 else: 4438 offset = None 4439 4440 limit_exp = self.expression( 4441 exp.Limit, 4442 this=this, 4443 expression=expression, 4444 offset=offset, 4445 comments=comments, 4446 expressions=self._parse_limit_by(), 4447 ) 4448 4449 return limit_exp 4450 4451 if self._match(TokenType.FETCH): 4452 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4453 direction = self._prev.text.upper() if direction else "FIRST" 4454 4455 count = self._parse_field(tokens=self.FETCH_TOKENS) 4456 percent = self._match(TokenType.PERCENT) 4457 4458 self._match_set((TokenType.ROW, TokenType.ROWS)) 4459 4460 only = self._match_text_seq("ONLY") 4461 with_ties = self._match_text_seq("WITH", "TIES") 4462 4463 if only and with_ties: 4464 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4465 4466 return self.expression( 4467 exp.Fetch, 4468 direction=direction, 4469 count=count, 4470 percent=percent, 4471 with_ties=with_ties, 4472 ) 4473 4474 return this 4475 4476 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4477 if not self._match(TokenType.OFFSET): 4478 return this 4479 4480 count = self._parse_term() 4481 self._match_set((TokenType.ROW, TokenType.ROWS)) 4482 4483 return self.expression( 4484 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4485 ) 4486 4487 def _can_parse_limit_or_offset(self) -> bool: 4488 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4489 return False 4490 4491 index = self._index 4492 result = bool( 4493 self._try_parse(self._parse_limit, retreat=True) 4494 or self._try_parse(self._parse_offset, retreat=True) 4495 ) 4496 self._retreat(index) 4497 return result 4498 4499 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4500 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4501 4502 def _parse_locks(self) -> t.List[exp.Lock]: 4503 locks = [] 4504 while True: 4505 if self._match_text_seq("FOR", "UPDATE"): 4506 update = True 4507 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4508 "LOCK", "IN", "SHARE", "MODE" 4509 ): 4510 update = False 4511 else: 4512 break 4513 4514 expressions = None 4515 if self._match_text_seq("OF"): 4516 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4517 4518 wait: t.Optional[bool | exp.Expression] = None 4519 if self._match_text_seq("NOWAIT"): 4520 wait = True 4521 elif self._match_text_seq("WAIT"): 4522 wait = self._parse_primary() 4523 elif self._match_text_seq("SKIP", "LOCKED"): 4524 wait = False 4525 4526 locks.append( 4527 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4528 ) 4529 4530 return locks 4531 4532 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4533 while this and self._match_set(self.SET_OPERATIONS): 4534 token_type = self._prev.token_type 4535 4536 if token_type == TokenType.UNION: 4537 operation: t.Type[exp.SetOperation] = exp.Union 4538 elif token_type == TokenType.EXCEPT: 4539 operation = exp.Except 4540 else: 4541 operation = exp.Intersect 4542 4543 comments = self._prev.comments 4544 4545 if self._match(TokenType.DISTINCT): 4546 distinct: t.Optional[bool] = True 4547 elif self._match(TokenType.ALL): 4548 distinct = False 4549 else: 4550 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4551 if distinct is None: 4552 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4553 4554 by_name = self._match_text_seq("BY", "NAME") 4555 expression = self._parse_select(nested=True, parse_set_operation=False) 4556 4557 this = self.expression( 4558 operation, 4559 comments=comments, 4560 this=this, 4561 distinct=distinct, 4562 by_name=by_name, 4563 expression=expression, 4564 ) 4565 4566 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4567 expression = this.expression 4568 4569 if expression: 4570 for arg in self.SET_OP_MODIFIERS: 4571 expr = expression.args.get(arg) 4572 if expr: 4573 this.set(arg, expr.pop()) 4574 4575 return this 4576 4577 def _parse_expression(self) -> t.Optional[exp.Expression]: 4578 return self._parse_alias(self._parse_assignment()) 4579 4580 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4581 this = self._parse_disjunction() 4582 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4583 # This allows us to parse <non-identifier token> := <expr> 4584 this = exp.column( 4585 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4586 ) 4587 4588 while self._match_set(self.ASSIGNMENT): 4589 if isinstance(this, exp.Column) and len(this.parts) == 1: 4590 this = this.this 4591 4592 this = self.expression( 4593 self.ASSIGNMENT[self._prev.token_type], 4594 this=this, 4595 comments=self._prev_comments, 4596 expression=self._parse_assignment(), 4597 ) 4598 4599 return this 4600 4601 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4602 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4603 4604 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4605 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4606 4607 def _parse_equality(self) -> t.Optional[exp.Expression]: 4608 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4609 4610 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4611 return self._parse_tokens(self._parse_range, self.COMPARISON) 4612 4613 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4614 this = this or self._parse_bitwise() 4615 negate = self._match(TokenType.NOT) 4616 4617 if self._match_set(self.RANGE_PARSERS): 4618 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4619 if not expression: 4620 return this 4621 4622 this = expression 4623 elif self._match(TokenType.ISNULL): 4624 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4625 4626 # Postgres supports ISNULL and NOTNULL for conditions. 4627 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4628 if self._match(TokenType.NOTNULL): 4629 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4630 this = self.expression(exp.Not, this=this) 4631 4632 if negate: 4633 this = self._negate_range(this) 4634 4635 if self._match(TokenType.IS): 4636 this = self._parse_is(this) 4637 4638 return this 4639 4640 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4641 if not this: 4642 return this 4643 4644 return self.expression(exp.Not, this=this) 4645 4646 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4647 index = self._index - 1 4648 negate = self._match(TokenType.NOT) 4649 4650 if self._match_text_seq("DISTINCT", "FROM"): 4651 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4652 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4653 4654 if self._match(TokenType.JSON): 4655 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4656 4657 if self._match_text_seq("WITH"): 4658 _with = True 4659 elif self._match_text_seq("WITHOUT"): 4660 _with = False 4661 else: 4662 _with = None 4663 4664 unique = self._match(TokenType.UNIQUE) 4665 self._match_text_seq("KEYS") 4666 expression: t.Optional[exp.Expression] = self.expression( 4667 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4668 ) 4669 else: 4670 expression = self._parse_primary() or self._parse_null() 4671 if not expression: 4672 self._retreat(index) 4673 return None 4674 4675 this = self.expression(exp.Is, this=this, expression=expression) 4676 return self.expression(exp.Not, this=this) if negate else this 4677 4678 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4679 unnest = self._parse_unnest(with_alias=False) 4680 if unnest: 4681 this = self.expression(exp.In, this=this, unnest=unnest) 4682 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4683 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4684 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4685 4686 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4687 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4688 else: 4689 this = self.expression(exp.In, this=this, expressions=expressions) 4690 4691 if matched_l_paren: 4692 self._match_r_paren(this) 4693 elif not self._match(TokenType.R_BRACKET, expression=this): 4694 self.raise_error("Expecting ]") 4695 else: 4696 this = self.expression(exp.In, this=this, field=self._parse_column()) 4697 4698 return this 4699 4700 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4701 low = self._parse_bitwise() 4702 self._match(TokenType.AND) 4703 high = self._parse_bitwise() 4704 return self.expression(exp.Between, this=this, low=low, high=high) 4705 4706 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4707 if not self._match(TokenType.ESCAPE): 4708 return this 4709 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4710 4711 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4712 index = self._index 4713 4714 if not self._match(TokenType.INTERVAL) and match_interval: 4715 return None 4716 4717 if self._match(TokenType.STRING, advance=False): 4718 this = self._parse_primary() 4719 else: 4720 this = self._parse_term() 4721 4722 if not this or ( 4723 isinstance(this, exp.Column) 4724 and not this.table 4725 and not this.this.quoted 4726 and this.name.upper() == "IS" 4727 ): 4728 self._retreat(index) 4729 return None 4730 4731 unit = self._parse_function() or ( 4732 not self._match(TokenType.ALIAS, advance=False) 4733 and self._parse_var(any_token=True, upper=True) 4734 ) 4735 4736 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4737 # each INTERVAL expression into this canonical form so it's easy to transpile 4738 if this and this.is_number: 4739 this = exp.Literal.string(this.to_py()) 4740 elif this and this.is_string: 4741 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4742 if parts and unit: 4743 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4744 unit = None 4745 self._retreat(self._index - 1) 4746 4747 if len(parts) == 1: 4748 this = exp.Literal.string(parts[0][0]) 4749 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4750 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4751 unit = self.expression( 4752 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4753 ) 4754 4755 interval = self.expression(exp.Interval, this=this, unit=unit) 4756 4757 index = self._index 4758 self._match(TokenType.PLUS) 4759 4760 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4761 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4762 return self.expression( 4763 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4764 ) 4765 4766 self._retreat(index) 4767 return interval 4768 4769 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4770 this = self._parse_term() 4771 4772 while True: 4773 if self._match_set(self.BITWISE): 4774 this = self.expression( 4775 self.BITWISE[self._prev.token_type], 4776 this=this, 4777 expression=self._parse_term(), 4778 ) 4779 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4780 this = self.expression( 4781 exp.DPipe, 4782 this=this, 4783 expression=self._parse_term(), 4784 safe=not self.dialect.STRICT_STRING_CONCAT, 4785 ) 4786 elif self._match(TokenType.DQMARK): 4787 this = self.expression( 4788 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4789 ) 4790 elif self._match_pair(TokenType.LT, TokenType.LT): 4791 this = self.expression( 4792 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4793 ) 4794 elif self._match_pair(TokenType.GT, TokenType.GT): 4795 this = self.expression( 4796 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4797 ) 4798 else: 4799 break 4800 4801 return this 4802 4803 def _parse_term(self) -> t.Optional[exp.Expression]: 4804 this = self._parse_factor() 4805 4806 while self._match_set(self.TERM): 4807 klass = self.TERM[self._prev.token_type] 4808 comments = self._prev_comments 4809 expression = self._parse_factor() 4810 4811 this = self.expression(klass, this=this, comments=comments, expression=expression) 4812 4813 if isinstance(this, exp.Collate): 4814 expr = this.expression 4815 4816 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4817 # fallback to Identifier / Var 4818 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4819 ident = expr.this 4820 if isinstance(ident, exp.Identifier): 4821 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4822 4823 return this 4824 4825 def _parse_factor(self) -> t.Optional[exp.Expression]: 4826 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4827 this = parse_method() 4828 4829 while self._match_set(self.FACTOR): 4830 klass = self.FACTOR[self._prev.token_type] 4831 comments = self._prev_comments 4832 expression = parse_method() 4833 4834 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4835 self._retreat(self._index - 1) 4836 return this 4837 4838 this = self.expression(klass, this=this, comments=comments, expression=expression) 4839 4840 if isinstance(this, exp.Div): 4841 this.args["typed"] = self.dialect.TYPED_DIVISION 4842 this.args["safe"] = self.dialect.SAFE_DIVISION 4843 4844 return this 4845 4846 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4847 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4848 4849 def _parse_unary(self) -> t.Optional[exp.Expression]: 4850 if self._match_set(self.UNARY_PARSERS): 4851 return self.UNARY_PARSERS[self._prev.token_type](self) 4852 return self._parse_at_time_zone(self._parse_type()) 4853 4854 def _parse_type( 4855 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4856 ) -> t.Optional[exp.Expression]: 4857 interval = parse_interval and self._parse_interval() 4858 if interval: 4859 return interval 4860 4861 index = self._index 4862 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4863 4864 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4865 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4866 if isinstance(data_type, exp.Cast): 4867 # This constructor can contain ops directly after it, for instance struct unnesting: 4868 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4869 return self._parse_column_ops(data_type) 4870 4871 if data_type: 4872 index2 = self._index 4873 this = self._parse_primary() 4874 4875 if isinstance(this, exp.Literal): 4876 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4877 if parser: 4878 return parser(self, this, data_type) 4879 4880 return self.expression(exp.Cast, this=this, to=data_type) 4881 4882 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4883 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4884 # 4885 # If the index difference here is greater than 1, that means the parser itself must have 4886 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4887 # 4888 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4889 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4890 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4891 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4892 # 4893 # In these cases, we don't really want to return the converted type, but instead retreat 4894 # and try to parse a Column or Identifier in the section below. 4895 if data_type.expressions and index2 - index > 1: 4896 self._retreat(index2) 4897 return self._parse_column_ops(data_type) 4898 4899 self._retreat(index) 4900 4901 if fallback_to_identifier: 4902 return self._parse_id_var() 4903 4904 this = self._parse_column() 4905 return this and self._parse_column_ops(this) 4906 4907 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4908 this = self._parse_type() 4909 if not this: 4910 return None 4911 4912 if isinstance(this, exp.Column) and not this.table: 4913 this = exp.var(this.name.upper()) 4914 4915 return self.expression( 4916 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4917 ) 4918 4919 def _parse_types( 4920 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4921 ) -> t.Optional[exp.Expression]: 4922 index = self._index 4923 4924 this: t.Optional[exp.Expression] = None 4925 prefix = self._match_text_seq("SYSUDTLIB", ".") 4926 4927 if not self._match_set(self.TYPE_TOKENS): 4928 identifier = allow_identifiers and self._parse_id_var( 4929 any_token=False, tokens=(TokenType.VAR,) 4930 ) 4931 if isinstance(identifier, exp.Identifier): 4932 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4933 4934 if len(tokens) != 1: 4935 self.raise_error("Unexpected identifier", self._prev) 4936 4937 if tokens[0].token_type in self.TYPE_TOKENS: 4938 self._prev = tokens[0] 4939 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4940 type_name = identifier.name 4941 4942 while self._match(TokenType.DOT): 4943 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4944 4945 this = exp.DataType.build(type_name, udt=True) 4946 else: 4947 self._retreat(self._index - 1) 4948 return None 4949 else: 4950 return None 4951 4952 type_token = self._prev.token_type 4953 4954 if type_token == TokenType.PSEUDO_TYPE: 4955 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4956 4957 if type_token == TokenType.OBJECT_IDENTIFIER: 4958 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4959 4960 # https://materialize.com/docs/sql/types/map/ 4961 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4962 key_type = self._parse_types( 4963 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4964 ) 4965 if not self._match(TokenType.FARROW): 4966 self._retreat(index) 4967 return None 4968 4969 value_type = self._parse_types( 4970 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4971 ) 4972 if not self._match(TokenType.R_BRACKET): 4973 self._retreat(index) 4974 return None 4975 4976 return exp.DataType( 4977 this=exp.DataType.Type.MAP, 4978 expressions=[key_type, value_type], 4979 nested=True, 4980 prefix=prefix, 4981 ) 4982 4983 nested = type_token in self.NESTED_TYPE_TOKENS 4984 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4985 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4986 expressions = None 4987 maybe_func = False 4988 4989 if self._match(TokenType.L_PAREN): 4990 if is_struct: 4991 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4992 elif nested: 4993 expressions = self._parse_csv( 4994 lambda: self._parse_types( 4995 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4996 ) 4997 ) 4998 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4999 this = expressions[0] 5000 this.set("nullable", True) 5001 self._match_r_paren() 5002 return this 5003 elif type_token in self.ENUM_TYPE_TOKENS: 5004 expressions = self._parse_csv(self._parse_equality) 5005 elif is_aggregate: 5006 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5007 any_token=False, tokens=(TokenType.VAR,) 5008 ) 5009 if not func_or_ident or not self._match(TokenType.COMMA): 5010 return None 5011 expressions = self._parse_csv( 5012 lambda: self._parse_types( 5013 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5014 ) 5015 ) 5016 expressions.insert(0, func_or_ident) 5017 else: 5018 expressions = self._parse_csv(self._parse_type_size) 5019 5020 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5021 if type_token == TokenType.VECTOR and len(expressions) == 2: 5022 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5023 5024 if not expressions or not self._match(TokenType.R_PAREN): 5025 self._retreat(index) 5026 return None 5027 5028 maybe_func = True 5029 5030 values: t.Optional[t.List[exp.Expression]] = None 5031 5032 if nested and self._match(TokenType.LT): 5033 if is_struct: 5034 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5035 else: 5036 expressions = self._parse_csv( 5037 lambda: self._parse_types( 5038 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5039 ) 5040 ) 5041 5042 if not self._match(TokenType.GT): 5043 self.raise_error("Expecting >") 5044 5045 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5046 values = self._parse_csv(self._parse_assignment) 5047 if not values and is_struct: 5048 values = None 5049 self._retreat(self._index - 1) 5050 else: 5051 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5052 5053 if type_token in self.TIMESTAMPS: 5054 if self._match_text_seq("WITH", "TIME", "ZONE"): 5055 maybe_func = False 5056 tz_type = ( 5057 exp.DataType.Type.TIMETZ 5058 if type_token in self.TIMES 5059 else exp.DataType.Type.TIMESTAMPTZ 5060 ) 5061 this = exp.DataType(this=tz_type, expressions=expressions) 5062 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5063 maybe_func = False 5064 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5065 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5066 maybe_func = False 5067 elif type_token == TokenType.INTERVAL: 5068 unit = self._parse_var(upper=True) 5069 if unit: 5070 if self._match_text_seq("TO"): 5071 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5072 5073 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5074 else: 5075 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5076 5077 if maybe_func and check_func: 5078 index2 = self._index 5079 peek = self._parse_string() 5080 5081 if not peek: 5082 self._retreat(index) 5083 return None 5084 5085 self._retreat(index2) 5086 5087 if not this: 5088 if self._match_text_seq("UNSIGNED"): 5089 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5090 if not unsigned_type_token: 5091 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5092 5093 type_token = unsigned_type_token or type_token 5094 5095 this = exp.DataType( 5096 this=exp.DataType.Type[type_token.value], 5097 expressions=expressions, 5098 nested=nested, 5099 prefix=prefix, 5100 ) 5101 5102 # Empty arrays/structs are allowed 5103 if values is not None: 5104 cls = exp.Struct if is_struct else exp.Array 5105 this = exp.cast(cls(expressions=values), this, copy=False) 5106 5107 elif expressions: 5108 this.set("expressions", expressions) 5109 5110 # https://materialize.com/docs/sql/types/list/#type-name 5111 while self._match(TokenType.LIST): 5112 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5113 5114 index = self._index 5115 5116 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5117 matched_array = self._match(TokenType.ARRAY) 5118 5119 while self._curr: 5120 datatype_token = self._prev.token_type 5121 matched_l_bracket = self._match(TokenType.L_BRACKET) 5122 5123 if (not matched_l_bracket and not matched_array) or ( 5124 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5125 ): 5126 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5127 # not to be confused with the fixed size array parsing 5128 break 5129 5130 matched_array = False 5131 values = self._parse_csv(self._parse_assignment) or None 5132 if ( 5133 values 5134 and not schema 5135 and ( 5136 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5137 ) 5138 ): 5139 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5140 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5141 self._retreat(index) 5142 break 5143 5144 this = exp.DataType( 5145 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5146 ) 5147 self._match(TokenType.R_BRACKET) 5148 5149 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5150 converter = self.TYPE_CONVERTERS.get(this.this) 5151 if converter: 5152 this = converter(t.cast(exp.DataType, this)) 5153 5154 return this 5155 5156 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5157 index = self._index 5158 5159 if ( 5160 self._curr 5161 and self._next 5162 and self._curr.token_type in self.TYPE_TOKENS 5163 and self._next.token_type in self.TYPE_TOKENS 5164 ): 5165 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5166 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5167 this = self._parse_id_var() 5168 else: 5169 this = ( 5170 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5171 or self._parse_id_var() 5172 ) 5173 5174 self._match(TokenType.COLON) 5175 5176 if ( 5177 type_required 5178 and not isinstance(this, exp.DataType) 5179 and not self._match_set(self.TYPE_TOKENS, advance=False) 5180 ): 5181 self._retreat(index) 5182 return self._parse_types() 5183 5184 return self._parse_column_def(this) 5185 5186 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5187 if not self._match_text_seq("AT", "TIME", "ZONE"): 5188 return this 5189 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5190 5191 def _parse_column(self) -> t.Optional[exp.Expression]: 5192 this = self._parse_column_reference() 5193 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5194 5195 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5196 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5197 5198 return column 5199 5200 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5201 this = self._parse_field() 5202 if ( 5203 not this 5204 and self._match(TokenType.VALUES, advance=False) 5205 and self.VALUES_FOLLOWED_BY_PAREN 5206 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5207 ): 5208 this = self._parse_id_var() 5209 5210 if isinstance(this, exp.Identifier): 5211 # We bubble up comments from the Identifier to the Column 5212 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5213 5214 return this 5215 5216 def _parse_colon_as_variant_extract( 5217 self, this: t.Optional[exp.Expression] 5218 ) -> t.Optional[exp.Expression]: 5219 casts = [] 5220 json_path = [] 5221 escape = None 5222 5223 while self._match(TokenType.COLON): 5224 start_index = self._index 5225 5226 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5227 path = self._parse_column_ops( 5228 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5229 ) 5230 5231 # The cast :: operator has a lower precedence than the extraction operator :, so 5232 # we rearrange the AST appropriately to avoid casting the JSON path 5233 while isinstance(path, exp.Cast): 5234 casts.append(path.to) 5235 path = path.this 5236 5237 if casts: 5238 dcolon_offset = next( 5239 i 5240 for i, t in enumerate(self._tokens[start_index:]) 5241 if t.token_type == TokenType.DCOLON 5242 ) 5243 end_token = self._tokens[start_index + dcolon_offset - 1] 5244 else: 5245 end_token = self._prev 5246 5247 if path: 5248 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5249 # it'll roundtrip to a string literal in GET_PATH 5250 if isinstance(path, exp.Identifier) and path.quoted: 5251 escape = True 5252 5253 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5254 5255 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5256 # Databricks transforms it back to the colon/dot notation 5257 if json_path: 5258 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5259 5260 if json_path_expr: 5261 json_path_expr.set("escape", escape) 5262 5263 this = self.expression( 5264 exp.JSONExtract, 5265 this=this, 5266 expression=json_path_expr, 5267 variant_extract=True, 5268 ) 5269 5270 while casts: 5271 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5272 5273 return this 5274 5275 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5276 return self._parse_types() 5277 5278 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5279 this = self._parse_bracket(this) 5280 5281 while self._match_set(self.COLUMN_OPERATORS): 5282 op_token = self._prev.token_type 5283 op = self.COLUMN_OPERATORS.get(op_token) 5284 5285 if op_token == TokenType.DCOLON: 5286 field = self._parse_dcolon() 5287 if not field: 5288 self.raise_error("Expected type") 5289 elif op and self._curr: 5290 field = self._parse_column_reference() or self._parse_bracket() 5291 else: 5292 field = self._parse_field(any_token=True, anonymous_func=True) 5293 5294 if isinstance(field, (exp.Func, exp.Window)) and this: 5295 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5296 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5297 this = exp.replace_tree( 5298 this, 5299 lambda n: ( 5300 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5301 if n.table 5302 else n.this 5303 ) 5304 if isinstance(n, exp.Column) 5305 else n, 5306 ) 5307 5308 if op: 5309 this = op(self, this, field) 5310 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5311 this = self.expression( 5312 exp.Column, 5313 comments=this.comments, 5314 this=field, 5315 table=this.this, 5316 db=this.args.get("table"), 5317 catalog=this.args.get("db"), 5318 ) 5319 elif isinstance(field, exp.Window): 5320 # Move the exp.Dot's to the window's function 5321 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5322 field.set("this", window_func) 5323 this = field 5324 else: 5325 this = self.expression(exp.Dot, this=this, expression=field) 5326 5327 if field and field.comments: 5328 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5329 5330 this = self._parse_bracket(this) 5331 5332 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5333 5334 def _parse_primary(self) -> t.Optional[exp.Expression]: 5335 if self._match_set(self.PRIMARY_PARSERS): 5336 token_type = self._prev.token_type 5337 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5338 5339 if token_type == TokenType.STRING: 5340 expressions = [primary] 5341 while self._match(TokenType.STRING): 5342 expressions.append(exp.Literal.string(self._prev.text)) 5343 5344 if len(expressions) > 1: 5345 return self.expression(exp.Concat, expressions=expressions) 5346 5347 return primary 5348 5349 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5350 return exp.Literal.number(f"0.{self._prev.text}") 5351 5352 if self._match(TokenType.L_PAREN): 5353 comments = self._prev_comments 5354 query = self._parse_select() 5355 5356 if query: 5357 expressions = [query] 5358 else: 5359 expressions = self._parse_expressions() 5360 5361 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5362 5363 if not this and self._match(TokenType.R_PAREN, advance=False): 5364 this = self.expression(exp.Tuple) 5365 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5366 this = self._parse_subquery(this=this, parse_alias=False) 5367 elif isinstance(this, exp.Subquery): 5368 this = self._parse_subquery( 5369 this=self._parse_set_operations(this), parse_alias=False 5370 ) 5371 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5372 this = self.expression(exp.Tuple, expressions=expressions) 5373 else: 5374 this = self.expression(exp.Paren, this=this) 5375 5376 if this: 5377 this.add_comments(comments) 5378 5379 self._match_r_paren(expression=this) 5380 return this 5381 5382 return None 5383 5384 def _parse_field( 5385 self, 5386 any_token: bool = False, 5387 tokens: t.Optional[t.Collection[TokenType]] = None, 5388 anonymous_func: bool = False, 5389 ) -> t.Optional[exp.Expression]: 5390 if anonymous_func: 5391 field = ( 5392 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5393 or self._parse_primary() 5394 ) 5395 else: 5396 field = self._parse_primary() or self._parse_function( 5397 anonymous=anonymous_func, any_token=any_token 5398 ) 5399 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5400 5401 def _parse_function( 5402 self, 5403 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5404 anonymous: bool = False, 5405 optional_parens: bool = True, 5406 any_token: bool = False, 5407 ) -> t.Optional[exp.Expression]: 5408 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5409 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5410 fn_syntax = False 5411 if ( 5412 self._match(TokenType.L_BRACE, advance=False) 5413 and self._next 5414 and self._next.text.upper() == "FN" 5415 ): 5416 self._advance(2) 5417 fn_syntax = True 5418 5419 func = self._parse_function_call( 5420 functions=functions, 5421 anonymous=anonymous, 5422 optional_parens=optional_parens, 5423 any_token=any_token, 5424 ) 5425 5426 if fn_syntax: 5427 self._match(TokenType.R_BRACE) 5428 5429 return func 5430 5431 def _parse_function_call( 5432 self, 5433 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5434 anonymous: bool = False, 5435 optional_parens: bool = True, 5436 any_token: bool = False, 5437 ) -> t.Optional[exp.Expression]: 5438 if not self._curr: 5439 return None 5440 5441 comments = self._curr.comments 5442 token_type = self._curr.token_type 5443 this = self._curr.text 5444 upper = this.upper() 5445 5446 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5447 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5448 self._advance() 5449 return self._parse_window(parser(self)) 5450 5451 if not self._next or self._next.token_type != TokenType.L_PAREN: 5452 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5453 self._advance() 5454 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5455 5456 return None 5457 5458 if any_token: 5459 if token_type in self.RESERVED_TOKENS: 5460 return None 5461 elif token_type not in self.FUNC_TOKENS: 5462 return None 5463 5464 self._advance(2) 5465 5466 parser = self.FUNCTION_PARSERS.get(upper) 5467 if parser and not anonymous: 5468 this = parser(self) 5469 else: 5470 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5471 5472 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5473 this = self.expression( 5474 subquery_predicate, comments=comments, this=self._parse_select() 5475 ) 5476 self._match_r_paren() 5477 return this 5478 5479 if functions is None: 5480 functions = self.FUNCTIONS 5481 5482 function = functions.get(upper) 5483 known_function = function and not anonymous 5484 5485 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5486 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5487 5488 post_func_comments = self._curr and self._curr.comments 5489 if known_function and post_func_comments: 5490 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5491 # call we'll construct it as exp.Anonymous, even if it's "known" 5492 if any( 5493 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5494 for comment in post_func_comments 5495 ): 5496 known_function = False 5497 5498 if alias and known_function: 5499 args = self._kv_to_prop_eq(args) 5500 5501 if known_function: 5502 func_builder = t.cast(t.Callable, function) 5503 5504 if "dialect" in func_builder.__code__.co_varnames: 5505 func = func_builder(args, dialect=self.dialect) 5506 else: 5507 func = func_builder(args) 5508 5509 func = self.validate_expression(func, args) 5510 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5511 func.meta["name"] = this 5512 5513 this = func 5514 else: 5515 if token_type == TokenType.IDENTIFIER: 5516 this = exp.Identifier(this=this, quoted=True) 5517 this = self.expression(exp.Anonymous, this=this, expressions=args) 5518 5519 if isinstance(this, exp.Expression): 5520 this.add_comments(comments) 5521 5522 self._match_r_paren(this) 5523 return self._parse_window(this) 5524 5525 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5526 return expression 5527 5528 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5529 transformed = [] 5530 5531 for index, e in enumerate(expressions): 5532 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5533 if isinstance(e, exp.Alias): 5534 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5535 5536 if not isinstance(e, exp.PropertyEQ): 5537 e = self.expression( 5538 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5539 ) 5540 5541 if isinstance(e.this, exp.Column): 5542 e.this.replace(e.this.this) 5543 else: 5544 e = self._to_prop_eq(e, index) 5545 5546 transformed.append(e) 5547 5548 return transformed 5549 5550 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5551 return self._parse_statement() 5552 5553 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5554 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5555 5556 def _parse_user_defined_function( 5557 self, kind: t.Optional[TokenType] = None 5558 ) -> t.Optional[exp.Expression]: 5559 this = self._parse_id_var() 5560 5561 while self._match(TokenType.DOT): 5562 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5563 5564 if not self._match(TokenType.L_PAREN): 5565 return this 5566 5567 expressions = self._parse_csv(self._parse_function_parameter) 5568 self._match_r_paren() 5569 return self.expression( 5570 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5571 ) 5572 5573 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5574 literal = self._parse_primary() 5575 if literal: 5576 return self.expression(exp.Introducer, this=token.text, expression=literal) 5577 5578 return self.expression(exp.Identifier, this=token.text) 5579 5580 def _parse_session_parameter(self) -> exp.SessionParameter: 5581 kind = None 5582 this = self._parse_id_var() or self._parse_primary() 5583 5584 if this and self._match(TokenType.DOT): 5585 kind = this.name 5586 this = self._parse_var() or self._parse_primary() 5587 5588 return self.expression(exp.SessionParameter, this=this, kind=kind) 5589 5590 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5591 return self._parse_id_var() 5592 5593 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5594 index = self._index 5595 5596 if self._match(TokenType.L_PAREN): 5597 expressions = t.cast( 5598 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5599 ) 5600 5601 if not self._match(TokenType.R_PAREN): 5602 self._retreat(index) 5603 else: 5604 expressions = [self._parse_lambda_arg()] 5605 5606 if self._match_set(self.LAMBDAS): 5607 return self.LAMBDAS[self._prev.token_type](self, expressions) 5608 5609 self._retreat(index) 5610 5611 this: t.Optional[exp.Expression] 5612 5613 if self._match(TokenType.DISTINCT): 5614 this = self.expression( 5615 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5616 ) 5617 else: 5618 this = self._parse_select_or_expression(alias=alias) 5619 5620 return self._parse_limit( 5621 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5622 ) 5623 5624 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5625 index = self._index 5626 if not self._match(TokenType.L_PAREN): 5627 return this 5628 5629 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5630 # expr can be of both types 5631 if self._match_set(self.SELECT_START_TOKENS): 5632 self._retreat(index) 5633 return this 5634 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5635 self._match_r_paren() 5636 return self.expression(exp.Schema, this=this, expressions=args) 5637 5638 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5639 return self._parse_column_def(self._parse_field(any_token=True)) 5640 5641 def _parse_column_def( 5642 self, this: t.Optional[exp.Expression], computed_column: bool = True 5643 ) -> t.Optional[exp.Expression]: 5644 # column defs are not really columns, they're identifiers 5645 if isinstance(this, exp.Column): 5646 this = this.this 5647 5648 if not computed_column: 5649 self._match(TokenType.ALIAS) 5650 5651 kind = self._parse_types(schema=True) 5652 5653 if self._match_text_seq("FOR", "ORDINALITY"): 5654 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5655 5656 constraints: t.List[exp.Expression] = [] 5657 5658 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5659 ("ALIAS", "MATERIALIZED") 5660 ): 5661 persisted = self._prev.text.upper() == "MATERIALIZED" 5662 constraint_kind = exp.ComputedColumnConstraint( 5663 this=self._parse_assignment(), 5664 persisted=persisted or self._match_text_seq("PERSISTED"), 5665 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5666 ) 5667 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5668 elif ( 5669 kind 5670 and self._match(TokenType.ALIAS, advance=False) 5671 and ( 5672 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5673 or (self._next and self._next.token_type == TokenType.L_PAREN) 5674 ) 5675 ): 5676 self._advance() 5677 constraints.append( 5678 self.expression( 5679 exp.ColumnConstraint, 5680 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5681 ) 5682 ) 5683 5684 while True: 5685 constraint = self._parse_column_constraint() 5686 if not constraint: 5687 break 5688 constraints.append(constraint) 5689 5690 if not kind and not constraints: 5691 return this 5692 5693 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5694 5695 def _parse_auto_increment( 5696 self, 5697 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5698 start = None 5699 increment = None 5700 5701 if self._match(TokenType.L_PAREN, advance=False): 5702 args = self._parse_wrapped_csv(self._parse_bitwise) 5703 start = seq_get(args, 0) 5704 increment = seq_get(args, 1) 5705 elif self._match_text_seq("START"): 5706 start = self._parse_bitwise() 5707 self._match_text_seq("INCREMENT") 5708 increment = self._parse_bitwise() 5709 5710 if start and increment: 5711 return exp.GeneratedAsIdentityColumnConstraint( 5712 start=start, increment=increment, this=False 5713 ) 5714 5715 return exp.AutoIncrementColumnConstraint() 5716 5717 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5718 if not self._match_text_seq("REFRESH"): 5719 self._retreat(self._index - 1) 5720 return None 5721 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5722 5723 def _parse_compress(self) -> exp.CompressColumnConstraint: 5724 if self._match(TokenType.L_PAREN, advance=False): 5725 return self.expression( 5726 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5727 ) 5728 5729 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5730 5731 def _parse_generated_as_identity( 5732 self, 5733 ) -> ( 5734 exp.GeneratedAsIdentityColumnConstraint 5735 | exp.ComputedColumnConstraint 5736 | exp.GeneratedAsRowColumnConstraint 5737 ): 5738 if self._match_text_seq("BY", "DEFAULT"): 5739 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5740 this = self.expression( 5741 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5742 ) 5743 else: 5744 self._match_text_seq("ALWAYS") 5745 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5746 5747 self._match(TokenType.ALIAS) 5748 5749 if self._match_text_seq("ROW"): 5750 start = self._match_text_seq("START") 5751 if not start: 5752 self._match(TokenType.END) 5753 hidden = self._match_text_seq("HIDDEN") 5754 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5755 5756 identity = self._match_text_seq("IDENTITY") 5757 5758 if self._match(TokenType.L_PAREN): 5759 if self._match(TokenType.START_WITH): 5760 this.set("start", self._parse_bitwise()) 5761 if self._match_text_seq("INCREMENT", "BY"): 5762 this.set("increment", self._parse_bitwise()) 5763 if self._match_text_seq("MINVALUE"): 5764 this.set("minvalue", self._parse_bitwise()) 5765 if self._match_text_seq("MAXVALUE"): 5766 this.set("maxvalue", self._parse_bitwise()) 5767 5768 if self._match_text_seq("CYCLE"): 5769 this.set("cycle", True) 5770 elif self._match_text_seq("NO", "CYCLE"): 5771 this.set("cycle", False) 5772 5773 if not identity: 5774 this.set("expression", self._parse_range()) 5775 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5776 args = self._parse_csv(self._parse_bitwise) 5777 this.set("start", seq_get(args, 0)) 5778 this.set("increment", seq_get(args, 1)) 5779 5780 self._match_r_paren() 5781 5782 return this 5783 5784 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5785 self._match_text_seq("LENGTH") 5786 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5787 5788 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5789 if self._match_text_seq("NULL"): 5790 return self.expression(exp.NotNullColumnConstraint) 5791 if self._match_text_seq("CASESPECIFIC"): 5792 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5793 if self._match_text_seq("FOR", "REPLICATION"): 5794 return self.expression(exp.NotForReplicationColumnConstraint) 5795 5796 # Unconsume the `NOT` token 5797 self._retreat(self._index - 1) 5798 return None 5799 5800 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5801 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5802 5803 procedure_option_follows = ( 5804 self._match(TokenType.WITH, advance=False) 5805 and self._next 5806 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5807 ) 5808 5809 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5810 return self.expression( 5811 exp.ColumnConstraint, 5812 this=this, 5813 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5814 ) 5815 5816 return this 5817 5818 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5819 if not self._match(TokenType.CONSTRAINT): 5820 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5821 5822 return self.expression( 5823 exp.Constraint, 5824 this=self._parse_id_var(), 5825 expressions=self._parse_unnamed_constraints(), 5826 ) 5827 5828 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5829 constraints = [] 5830 while True: 5831 constraint = self._parse_unnamed_constraint() or self._parse_function() 5832 if not constraint: 5833 break 5834 constraints.append(constraint) 5835 5836 return constraints 5837 5838 def _parse_unnamed_constraint( 5839 self, constraints: t.Optional[t.Collection[str]] = None 5840 ) -> t.Optional[exp.Expression]: 5841 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5842 constraints or self.CONSTRAINT_PARSERS 5843 ): 5844 return None 5845 5846 constraint = self._prev.text.upper() 5847 if constraint not in self.CONSTRAINT_PARSERS: 5848 self.raise_error(f"No parser found for schema constraint {constraint}.") 5849 5850 return self.CONSTRAINT_PARSERS[constraint](self) 5851 5852 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5853 return self._parse_id_var(any_token=False) 5854 5855 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5856 self._match_text_seq("KEY") 5857 return self.expression( 5858 exp.UniqueColumnConstraint, 5859 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5860 this=self._parse_schema(self._parse_unique_key()), 5861 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5862 on_conflict=self._parse_on_conflict(), 5863 ) 5864 5865 def _parse_key_constraint_options(self) -> t.List[str]: 5866 options = [] 5867 while True: 5868 if not self._curr: 5869 break 5870 5871 if self._match(TokenType.ON): 5872 action = None 5873 on = self._advance_any() and self._prev.text 5874 5875 if self._match_text_seq("NO", "ACTION"): 5876 action = "NO ACTION" 5877 elif self._match_text_seq("CASCADE"): 5878 action = "CASCADE" 5879 elif self._match_text_seq("RESTRICT"): 5880 action = "RESTRICT" 5881 elif self._match_pair(TokenType.SET, TokenType.NULL): 5882 action = "SET NULL" 5883 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5884 action = "SET DEFAULT" 5885 else: 5886 self.raise_error("Invalid key constraint") 5887 5888 options.append(f"ON {on} {action}") 5889 else: 5890 var = self._parse_var_from_options( 5891 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5892 ) 5893 if not var: 5894 break 5895 options.append(var.name) 5896 5897 return options 5898 5899 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5900 if match and not self._match(TokenType.REFERENCES): 5901 return None 5902 5903 expressions = None 5904 this = self._parse_table(schema=True) 5905 options = self._parse_key_constraint_options() 5906 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5907 5908 def _parse_foreign_key(self) -> exp.ForeignKey: 5909 expressions = self._parse_wrapped_id_vars() 5910 reference = self._parse_references() 5911 options = {} 5912 5913 while self._match(TokenType.ON): 5914 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5915 self.raise_error("Expected DELETE or UPDATE") 5916 5917 kind = self._prev.text.lower() 5918 5919 if self._match_text_seq("NO", "ACTION"): 5920 action = "NO ACTION" 5921 elif self._match(TokenType.SET): 5922 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5923 action = "SET " + self._prev.text.upper() 5924 else: 5925 self._advance() 5926 action = self._prev.text.upper() 5927 5928 options[kind] = action 5929 5930 return self.expression( 5931 exp.ForeignKey, 5932 expressions=expressions, 5933 reference=reference, 5934 **options, # type: ignore 5935 ) 5936 5937 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5938 return self._parse_ordered() or self._parse_field() 5939 5940 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5941 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5942 self._retreat(self._index - 1) 5943 return None 5944 5945 id_vars = self._parse_wrapped_id_vars() 5946 return self.expression( 5947 exp.PeriodForSystemTimeConstraint, 5948 this=seq_get(id_vars, 0), 5949 expression=seq_get(id_vars, 1), 5950 ) 5951 5952 def _parse_primary_key( 5953 self, wrapped_optional: bool = False, in_props: bool = False 5954 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5955 desc = ( 5956 self._match_set((TokenType.ASC, TokenType.DESC)) 5957 and self._prev.token_type == TokenType.DESC 5958 ) 5959 5960 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5961 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5962 5963 expressions = self._parse_wrapped_csv( 5964 self._parse_primary_key_part, optional=wrapped_optional 5965 ) 5966 options = self._parse_key_constraint_options() 5967 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5968 5969 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5970 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5971 5972 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5973 """ 5974 Parses a datetime column in ODBC format. We parse the column into the corresponding 5975 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5976 same as we did for `DATE('yyyy-mm-dd')`. 5977 5978 Reference: 5979 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5980 """ 5981 self._match(TokenType.VAR) 5982 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5983 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5984 if not self._match(TokenType.R_BRACE): 5985 self.raise_error("Expected }") 5986 return expression 5987 5988 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5989 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5990 return this 5991 5992 bracket_kind = self._prev.token_type 5993 if ( 5994 bracket_kind == TokenType.L_BRACE 5995 and self._curr 5996 and self._curr.token_type == TokenType.VAR 5997 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5998 ): 5999 return self._parse_odbc_datetime_literal() 6000 6001 expressions = self._parse_csv( 6002 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6003 ) 6004 6005 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6006 self.raise_error("Expected ]") 6007 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6008 self.raise_error("Expected }") 6009 6010 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6011 if bracket_kind == TokenType.L_BRACE: 6012 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6013 elif not this: 6014 this = build_array_constructor( 6015 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6016 ) 6017 else: 6018 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6019 if constructor_type: 6020 return build_array_constructor( 6021 constructor_type, 6022 args=expressions, 6023 bracket_kind=bracket_kind, 6024 dialect=self.dialect, 6025 ) 6026 6027 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6028 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6029 6030 self._add_comments(this) 6031 return self._parse_bracket(this) 6032 6033 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6034 if self._match(TokenType.COLON): 6035 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6036 return this 6037 6038 def _parse_case(self) -> t.Optional[exp.Expression]: 6039 ifs = [] 6040 default = None 6041 6042 comments = self._prev_comments 6043 expression = self._parse_assignment() 6044 6045 while self._match(TokenType.WHEN): 6046 this = self._parse_assignment() 6047 self._match(TokenType.THEN) 6048 then = self._parse_assignment() 6049 ifs.append(self.expression(exp.If, this=this, true=then)) 6050 6051 if self._match(TokenType.ELSE): 6052 default = self._parse_assignment() 6053 6054 if not self._match(TokenType.END): 6055 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6056 default = exp.column("interval") 6057 else: 6058 self.raise_error("Expected END after CASE", self._prev) 6059 6060 return self.expression( 6061 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6062 ) 6063 6064 def _parse_if(self) -> t.Optional[exp.Expression]: 6065 if self._match(TokenType.L_PAREN): 6066 args = self._parse_csv(self._parse_assignment) 6067 this = self.validate_expression(exp.If.from_arg_list(args), args) 6068 self._match_r_paren() 6069 else: 6070 index = self._index - 1 6071 6072 if self.NO_PAREN_IF_COMMANDS and index == 0: 6073 return self._parse_as_command(self._prev) 6074 6075 condition = self._parse_assignment() 6076 6077 if not condition: 6078 self._retreat(index) 6079 return None 6080 6081 self._match(TokenType.THEN) 6082 true = self._parse_assignment() 6083 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6084 self._match(TokenType.END) 6085 this = self.expression(exp.If, this=condition, true=true, false=false) 6086 6087 return this 6088 6089 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6090 if not self._match_text_seq("VALUE", "FOR"): 6091 self._retreat(self._index - 1) 6092 return None 6093 6094 return self.expression( 6095 exp.NextValueFor, 6096 this=self._parse_column(), 6097 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6098 ) 6099 6100 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6101 this = self._parse_function() or self._parse_var_or_string(upper=True) 6102 6103 if self._match(TokenType.FROM): 6104 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6105 6106 if not self._match(TokenType.COMMA): 6107 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6108 6109 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6110 6111 def _parse_gap_fill(self) -> exp.GapFill: 6112 self._match(TokenType.TABLE) 6113 this = self._parse_table() 6114 6115 self._match(TokenType.COMMA) 6116 args = [this, *self._parse_csv(self._parse_lambda)] 6117 6118 gap_fill = exp.GapFill.from_arg_list(args) 6119 return self.validate_expression(gap_fill, args) 6120 6121 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6122 this = self._parse_assignment() 6123 6124 if not self._match(TokenType.ALIAS): 6125 if self._match(TokenType.COMMA): 6126 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6127 6128 self.raise_error("Expected AS after CAST") 6129 6130 fmt = None 6131 to = self._parse_types() 6132 6133 default = self._match(TokenType.DEFAULT) 6134 if default: 6135 default = self._parse_bitwise() 6136 self._match_text_seq("ON", "CONVERSION", "ERROR") 6137 6138 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6139 fmt_string = self._parse_string() 6140 fmt = self._parse_at_time_zone(fmt_string) 6141 6142 if not to: 6143 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6144 if to.this in exp.DataType.TEMPORAL_TYPES: 6145 this = self.expression( 6146 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6147 this=this, 6148 format=exp.Literal.string( 6149 format_time( 6150 fmt_string.this if fmt_string else "", 6151 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6152 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6153 ) 6154 ), 6155 safe=safe, 6156 ) 6157 6158 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6159 this.set("zone", fmt.args["zone"]) 6160 return this 6161 elif not to: 6162 self.raise_error("Expected TYPE after CAST") 6163 elif isinstance(to, exp.Identifier): 6164 to = exp.DataType.build(to.name, udt=True) 6165 elif to.this == exp.DataType.Type.CHAR: 6166 if self._match(TokenType.CHARACTER_SET): 6167 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6168 6169 return self.expression( 6170 exp.Cast if strict else exp.TryCast, 6171 this=this, 6172 to=to, 6173 format=fmt, 6174 safe=safe, 6175 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6176 default=default, 6177 ) 6178 6179 def _parse_string_agg(self) -> exp.GroupConcat: 6180 if self._match(TokenType.DISTINCT): 6181 args: t.List[t.Optional[exp.Expression]] = [ 6182 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6183 ] 6184 if self._match(TokenType.COMMA): 6185 args.extend(self._parse_csv(self._parse_assignment)) 6186 else: 6187 args = self._parse_csv(self._parse_assignment) # type: ignore 6188 6189 if self._match_text_seq("ON", "OVERFLOW"): 6190 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6191 if self._match_text_seq("ERROR"): 6192 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6193 else: 6194 self._match_text_seq("TRUNCATE") 6195 on_overflow = self.expression( 6196 exp.OverflowTruncateBehavior, 6197 this=self._parse_string(), 6198 with_count=( 6199 self._match_text_seq("WITH", "COUNT") 6200 or not self._match_text_seq("WITHOUT", "COUNT") 6201 ), 6202 ) 6203 else: 6204 on_overflow = None 6205 6206 index = self._index 6207 if not self._match(TokenType.R_PAREN) and args: 6208 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6209 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6210 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6211 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6212 6213 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6214 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6215 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6216 if not self._match_text_seq("WITHIN", "GROUP"): 6217 self._retreat(index) 6218 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6219 6220 # The corresponding match_r_paren will be called in parse_function (caller) 6221 self._match_l_paren() 6222 6223 return self.expression( 6224 exp.GroupConcat, 6225 this=self._parse_order(this=seq_get(args, 0)), 6226 separator=seq_get(args, 1), 6227 on_overflow=on_overflow, 6228 ) 6229 6230 def _parse_convert( 6231 self, strict: bool, safe: t.Optional[bool] = None 6232 ) -> t.Optional[exp.Expression]: 6233 this = self._parse_bitwise() 6234 6235 if self._match(TokenType.USING): 6236 to: t.Optional[exp.Expression] = self.expression( 6237 exp.CharacterSet, this=self._parse_var() 6238 ) 6239 elif self._match(TokenType.COMMA): 6240 to = self._parse_types() 6241 else: 6242 to = None 6243 6244 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6245 6246 def _parse_xml_table(self) -> exp.XMLTable: 6247 namespaces = None 6248 passing = None 6249 columns = None 6250 6251 if self._match_text_seq("XMLNAMESPACES", "("): 6252 namespaces = self._parse_xml_namespace() 6253 self._match_text_seq(")", ",") 6254 6255 this = self._parse_string() 6256 6257 if self._match_text_seq("PASSING"): 6258 # The BY VALUE keywords are optional and are provided for semantic clarity 6259 self._match_text_seq("BY", "VALUE") 6260 passing = self._parse_csv(self._parse_column) 6261 6262 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6263 6264 if self._match_text_seq("COLUMNS"): 6265 columns = self._parse_csv(self._parse_field_def) 6266 6267 return self.expression( 6268 exp.XMLTable, 6269 this=this, 6270 namespaces=namespaces, 6271 passing=passing, 6272 columns=columns, 6273 by_ref=by_ref, 6274 ) 6275 6276 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6277 namespaces = [] 6278 6279 while True: 6280 if self._match(TokenType.DEFAULT): 6281 uri = self._parse_string() 6282 else: 6283 uri = self._parse_alias(self._parse_string()) 6284 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6285 if not self._match(TokenType.COMMA): 6286 break 6287 6288 return namespaces 6289 6290 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6291 """ 6292 There are generally two variants of the DECODE function: 6293 6294 - DECODE(bin, charset) 6295 - DECODE(expression, search, result [, search, result] ... [, default]) 6296 6297 The second variant will always be parsed into a CASE expression. Note that NULL 6298 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6299 instead of relying on pattern matching. 6300 """ 6301 args = self._parse_csv(self._parse_assignment) 6302 6303 if len(args) < 3: 6304 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6305 6306 expression, *expressions = args 6307 if not expression: 6308 return None 6309 6310 ifs = [] 6311 for search, result in zip(expressions[::2], expressions[1::2]): 6312 if not search or not result: 6313 return None 6314 6315 if isinstance(search, exp.Literal): 6316 ifs.append( 6317 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6318 ) 6319 elif isinstance(search, exp.Null): 6320 ifs.append( 6321 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6322 ) 6323 else: 6324 cond = exp.or_( 6325 exp.EQ(this=expression.copy(), expression=search), 6326 exp.and_( 6327 exp.Is(this=expression.copy(), expression=exp.Null()), 6328 exp.Is(this=search.copy(), expression=exp.Null()), 6329 copy=False, 6330 ), 6331 copy=False, 6332 ) 6333 ifs.append(exp.If(this=cond, true=result)) 6334 6335 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6336 6337 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6338 self._match_text_seq("KEY") 6339 key = self._parse_column() 6340 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6341 self._match_text_seq("VALUE") 6342 value = self._parse_bitwise() 6343 6344 if not key and not value: 6345 return None 6346 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6347 6348 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6349 if not this or not self._match_text_seq("FORMAT", "JSON"): 6350 return this 6351 6352 return self.expression(exp.FormatJson, this=this) 6353 6354 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6355 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6356 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6357 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6358 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6359 else: 6360 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6361 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6362 6363 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6364 6365 if not empty and not error and not null: 6366 return None 6367 6368 return self.expression( 6369 exp.OnCondition, 6370 empty=empty, 6371 error=error, 6372 null=null, 6373 ) 6374 6375 def _parse_on_handling( 6376 self, on: str, *values: str 6377 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6378 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6379 for value in values: 6380 if self._match_text_seq(value, "ON", on): 6381 return f"{value} ON {on}" 6382 6383 index = self._index 6384 if self._match(TokenType.DEFAULT): 6385 default_value = self._parse_bitwise() 6386 if self._match_text_seq("ON", on): 6387 return default_value 6388 6389 self._retreat(index) 6390 6391 return None 6392 6393 @t.overload 6394 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6395 6396 @t.overload 6397 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6398 6399 def _parse_json_object(self, agg=False): 6400 star = self._parse_star() 6401 expressions = ( 6402 [star] 6403 if star 6404 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6405 ) 6406 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6407 6408 unique_keys = None 6409 if self._match_text_seq("WITH", "UNIQUE"): 6410 unique_keys = True 6411 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6412 unique_keys = False 6413 6414 self._match_text_seq("KEYS") 6415 6416 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6417 self._parse_type() 6418 ) 6419 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6420 6421 return self.expression( 6422 exp.JSONObjectAgg if agg else exp.JSONObject, 6423 expressions=expressions, 6424 null_handling=null_handling, 6425 unique_keys=unique_keys, 6426 return_type=return_type, 6427 encoding=encoding, 6428 ) 6429 6430 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6431 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6432 if not self._match_text_seq("NESTED"): 6433 this = self._parse_id_var() 6434 kind = self._parse_types(allow_identifiers=False) 6435 nested = None 6436 else: 6437 this = None 6438 kind = None 6439 nested = True 6440 6441 path = self._match_text_seq("PATH") and self._parse_string() 6442 nested_schema = nested and self._parse_json_schema() 6443 6444 return self.expression( 6445 exp.JSONColumnDef, 6446 this=this, 6447 kind=kind, 6448 path=path, 6449 nested_schema=nested_schema, 6450 ) 6451 6452 def _parse_json_schema(self) -> exp.JSONSchema: 6453 self._match_text_seq("COLUMNS") 6454 return self.expression( 6455 exp.JSONSchema, 6456 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6457 ) 6458 6459 def _parse_json_table(self) -> exp.JSONTable: 6460 this = self._parse_format_json(self._parse_bitwise()) 6461 path = self._match(TokenType.COMMA) and self._parse_string() 6462 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6463 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6464 schema = self._parse_json_schema() 6465 6466 return exp.JSONTable( 6467 this=this, 6468 schema=schema, 6469 path=path, 6470 error_handling=error_handling, 6471 empty_handling=empty_handling, 6472 ) 6473 6474 def _parse_match_against(self) -> exp.MatchAgainst: 6475 expressions = self._parse_csv(self._parse_column) 6476 6477 self._match_text_seq(")", "AGAINST", "(") 6478 6479 this = self._parse_string() 6480 6481 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6482 modifier = "IN NATURAL LANGUAGE MODE" 6483 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6484 modifier = f"{modifier} WITH QUERY EXPANSION" 6485 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6486 modifier = "IN BOOLEAN MODE" 6487 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6488 modifier = "WITH QUERY EXPANSION" 6489 else: 6490 modifier = None 6491 6492 return self.expression( 6493 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6494 ) 6495 6496 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6497 def _parse_open_json(self) -> exp.OpenJSON: 6498 this = self._parse_bitwise() 6499 path = self._match(TokenType.COMMA) and self._parse_string() 6500 6501 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6502 this = self._parse_field(any_token=True) 6503 kind = self._parse_types() 6504 path = self._parse_string() 6505 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6506 6507 return self.expression( 6508 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6509 ) 6510 6511 expressions = None 6512 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6513 self._match_l_paren() 6514 expressions = self._parse_csv(_parse_open_json_column_def) 6515 6516 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6517 6518 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6519 args = self._parse_csv(self._parse_bitwise) 6520 6521 if self._match(TokenType.IN): 6522 return self.expression( 6523 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6524 ) 6525 6526 if haystack_first: 6527 haystack = seq_get(args, 0) 6528 needle = seq_get(args, 1) 6529 else: 6530 haystack = seq_get(args, 1) 6531 needle = seq_get(args, 0) 6532 6533 return self.expression( 6534 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6535 ) 6536 6537 def _parse_predict(self) -> exp.Predict: 6538 self._match_text_seq("MODEL") 6539 this = self._parse_table() 6540 6541 self._match(TokenType.COMMA) 6542 self._match_text_seq("TABLE") 6543 6544 return self.expression( 6545 exp.Predict, 6546 this=this, 6547 expression=self._parse_table(), 6548 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6549 ) 6550 6551 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6552 args = self._parse_csv(self._parse_table) 6553 return exp.JoinHint(this=func_name.upper(), expressions=args) 6554 6555 def _parse_substring(self) -> exp.Substring: 6556 # Postgres supports the form: substring(string [from int] [for int]) 6557 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6558 6559 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6560 6561 if self._match(TokenType.FROM): 6562 args.append(self._parse_bitwise()) 6563 if self._match(TokenType.FOR): 6564 if len(args) == 1: 6565 args.append(exp.Literal.number(1)) 6566 args.append(self._parse_bitwise()) 6567 6568 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6569 6570 def _parse_trim(self) -> exp.Trim: 6571 # https://www.w3resource.com/sql/character-functions/trim.php 6572 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6573 6574 position = None 6575 collation = None 6576 expression = None 6577 6578 if self._match_texts(self.TRIM_TYPES): 6579 position = self._prev.text.upper() 6580 6581 this = self._parse_bitwise() 6582 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6583 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6584 expression = self._parse_bitwise() 6585 6586 if invert_order: 6587 this, expression = expression, this 6588 6589 if self._match(TokenType.COLLATE): 6590 collation = self._parse_bitwise() 6591 6592 return self.expression( 6593 exp.Trim, this=this, position=position, expression=expression, collation=collation 6594 ) 6595 6596 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6597 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6598 6599 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6600 return self._parse_window(self._parse_id_var(), alias=True) 6601 6602 def _parse_respect_or_ignore_nulls( 6603 self, this: t.Optional[exp.Expression] 6604 ) -> t.Optional[exp.Expression]: 6605 if self._match_text_seq("IGNORE", "NULLS"): 6606 return self.expression(exp.IgnoreNulls, this=this) 6607 if self._match_text_seq("RESPECT", "NULLS"): 6608 return self.expression(exp.RespectNulls, this=this) 6609 return this 6610 6611 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6612 if self._match(TokenType.HAVING): 6613 self._match_texts(("MAX", "MIN")) 6614 max = self._prev.text.upper() != "MIN" 6615 return self.expression( 6616 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6617 ) 6618 6619 return this 6620 6621 def _parse_window( 6622 self, this: t.Optional[exp.Expression], alias: bool = False 6623 ) -> t.Optional[exp.Expression]: 6624 func = this 6625 comments = func.comments if isinstance(func, exp.Expression) else None 6626 6627 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6628 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6629 if self._match_text_seq("WITHIN", "GROUP"): 6630 order = self._parse_wrapped(self._parse_order) 6631 this = self.expression(exp.WithinGroup, this=this, expression=order) 6632 6633 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6634 self._match(TokenType.WHERE) 6635 this = self.expression( 6636 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6637 ) 6638 self._match_r_paren() 6639 6640 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6641 # Some dialects choose to implement and some do not. 6642 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6643 6644 # There is some code above in _parse_lambda that handles 6645 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6646 6647 # The below changes handle 6648 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6649 6650 # Oracle allows both formats 6651 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6652 # and Snowflake chose to do the same for familiarity 6653 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6654 if isinstance(this, exp.AggFunc): 6655 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6656 6657 if ignore_respect and ignore_respect is not this: 6658 ignore_respect.replace(ignore_respect.this) 6659 this = self.expression(ignore_respect.__class__, this=this) 6660 6661 this = self._parse_respect_or_ignore_nulls(this) 6662 6663 # bigquery select from window x AS (partition by ...) 6664 if alias: 6665 over = None 6666 self._match(TokenType.ALIAS) 6667 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6668 return this 6669 else: 6670 over = self._prev.text.upper() 6671 6672 if comments and isinstance(func, exp.Expression): 6673 func.pop_comments() 6674 6675 if not self._match(TokenType.L_PAREN): 6676 return self.expression( 6677 exp.Window, 6678 comments=comments, 6679 this=this, 6680 alias=self._parse_id_var(False), 6681 over=over, 6682 ) 6683 6684 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6685 6686 first = self._match(TokenType.FIRST) 6687 if self._match_text_seq("LAST"): 6688 first = False 6689 6690 partition, order = self._parse_partition_and_order() 6691 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6692 6693 if kind: 6694 self._match(TokenType.BETWEEN) 6695 start = self._parse_window_spec() 6696 self._match(TokenType.AND) 6697 end = self._parse_window_spec() 6698 6699 spec = self.expression( 6700 exp.WindowSpec, 6701 kind=kind, 6702 start=start["value"], 6703 start_side=start["side"], 6704 end=end["value"], 6705 end_side=end["side"], 6706 ) 6707 else: 6708 spec = None 6709 6710 self._match_r_paren() 6711 6712 window = self.expression( 6713 exp.Window, 6714 comments=comments, 6715 this=this, 6716 partition_by=partition, 6717 order=order, 6718 spec=spec, 6719 alias=window_alias, 6720 over=over, 6721 first=first, 6722 ) 6723 6724 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6725 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6726 return self._parse_window(window, alias=alias) 6727 6728 return window 6729 6730 def _parse_partition_and_order( 6731 self, 6732 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6733 return self._parse_partition_by(), self._parse_order() 6734 6735 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6736 self._match(TokenType.BETWEEN) 6737 6738 return { 6739 "value": ( 6740 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6741 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6742 or self._parse_bitwise() 6743 ), 6744 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6745 } 6746 6747 def _parse_alias( 6748 self, this: t.Optional[exp.Expression], explicit: bool = False 6749 ) -> t.Optional[exp.Expression]: 6750 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6751 # so this section tries to parse the clause version and if it fails, it treats the token 6752 # as an identifier (alias) 6753 if self._can_parse_limit_or_offset(): 6754 return this 6755 6756 any_token = self._match(TokenType.ALIAS) 6757 comments = self._prev_comments or [] 6758 6759 if explicit and not any_token: 6760 return this 6761 6762 if self._match(TokenType.L_PAREN): 6763 aliases = self.expression( 6764 exp.Aliases, 6765 comments=comments, 6766 this=this, 6767 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6768 ) 6769 self._match_r_paren(aliases) 6770 return aliases 6771 6772 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6773 self.STRING_ALIASES and self._parse_string_as_identifier() 6774 ) 6775 6776 if alias: 6777 comments.extend(alias.pop_comments()) 6778 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6779 column = this.this 6780 6781 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6782 if not this.comments and column and column.comments: 6783 this.comments = column.pop_comments() 6784 6785 return this 6786 6787 def _parse_id_var( 6788 self, 6789 any_token: bool = True, 6790 tokens: t.Optional[t.Collection[TokenType]] = None, 6791 ) -> t.Optional[exp.Expression]: 6792 expression = self._parse_identifier() 6793 if not expression and ( 6794 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6795 ): 6796 quoted = self._prev.token_type == TokenType.STRING 6797 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6798 6799 return expression 6800 6801 def _parse_string(self) -> t.Optional[exp.Expression]: 6802 if self._match_set(self.STRING_PARSERS): 6803 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6804 return self._parse_placeholder() 6805 6806 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6807 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6808 6809 def _parse_number(self) -> t.Optional[exp.Expression]: 6810 if self._match_set(self.NUMERIC_PARSERS): 6811 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6812 return self._parse_placeholder() 6813 6814 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6815 if self._match(TokenType.IDENTIFIER): 6816 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6817 return self._parse_placeholder() 6818 6819 def _parse_var( 6820 self, 6821 any_token: bool = False, 6822 tokens: t.Optional[t.Collection[TokenType]] = None, 6823 upper: bool = False, 6824 ) -> t.Optional[exp.Expression]: 6825 if ( 6826 (any_token and self._advance_any()) 6827 or self._match(TokenType.VAR) 6828 or (self._match_set(tokens) if tokens else False) 6829 ): 6830 return self.expression( 6831 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6832 ) 6833 return self._parse_placeholder() 6834 6835 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6836 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6837 self._advance() 6838 return self._prev 6839 return None 6840 6841 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6842 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6843 6844 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6845 return self._parse_primary() or self._parse_var(any_token=True) 6846 6847 def _parse_null(self) -> t.Optional[exp.Expression]: 6848 if self._match_set(self.NULL_TOKENS): 6849 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6850 return self._parse_placeholder() 6851 6852 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6853 if self._match(TokenType.TRUE): 6854 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6855 if self._match(TokenType.FALSE): 6856 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6857 return self._parse_placeholder() 6858 6859 def _parse_star(self) -> t.Optional[exp.Expression]: 6860 if self._match(TokenType.STAR): 6861 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6862 return self._parse_placeholder() 6863 6864 def _parse_parameter(self) -> exp.Parameter: 6865 this = self._parse_identifier() or self._parse_primary_or_var() 6866 return self.expression(exp.Parameter, this=this) 6867 6868 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6869 if self._match_set(self.PLACEHOLDER_PARSERS): 6870 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6871 if placeholder: 6872 return placeholder 6873 self._advance(-1) 6874 return None 6875 6876 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6877 if not self._match_texts(keywords): 6878 return None 6879 if self._match(TokenType.L_PAREN, advance=False): 6880 return self._parse_wrapped_csv(self._parse_expression) 6881 6882 expression = self._parse_expression() 6883 return [expression] if expression else None 6884 6885 def _parse_csv( 6886 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6887 ) -> t.List[exp.Expression]: 6888 parse_result = parse_method() 6889 items = [parse_result] if parse_result is not None else [] 6890 6891 while self._match(sep): 6892 self._add_comments(parse_result) 6893 parse_result = parse_method() 6894 if parse_result is not None: 6895 items.append(parse_result) 6896 6897 return items 6898 6899 def _parse_tokens( 6900 self, parse_method: t.Callable, expressions: t.Dict 6901 ) -> t.Optional[exp.Expression]: 6902 this = parse_method() 6903 6904 while self._match_set(expressions): 6905 this = self.expression( 6906 expressions[self._prev.token_type], 6907 this=this, 6908 comments=self._prev_comments, 6909 expression=parse_method(), 6910 ) 6911 6912 return this 6913 6914 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6915 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6916 6917 def _parse_wrapped_csv( 6918 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6919 ) -> t.List[exp.Expression]: 6920 return self._parse_wrapped( 6921 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6922 ) 6923 6924 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6925 wrapped = self._match(TokenType.L_PAREN) 6926 if not wrapped and not optional: 6927 self.raise_error("Expecting (") 6928 parse_result = parse_method() 6929 if wrapped: 6930 self._match_r_paren() 6931 return parse_result 6932 6933 def _parse_expressions(self) -> t.List[exp.Expression]: 6934 return self._parse_csv(self._parse_expression) 6935 6936 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6937 return self._parse_select() or self._parse_set_operations( 6938 self._parse_alias(self._parse_assignment(), explicit=True) 6939 if alias 6940 else self._parse_assignment() 6941 ) 6942 6943 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6944 return self._parse_query_modifiers( 6945 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6946 ) 6947 6948 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6949 this = None 6950 if self._match_texts(self.TRANSACTION_KIND): 6951 this = self._prev.text 6952 6953 self._match_texts(("TRANSACTION", "WORK")) 6954 6955 modes = [] 6956 while True: 6957 mode = [] 6958 while self._match(TokenType.VAR): 6959 mode.append(self._prev.text) 6960 6961 if mode: 6962 modes.append(" ".join(mode)) 6963 if not self._match(TokenType.COMMA): 6964 break 6965 6966 return self.expression(exp.Transaction, this=this, modes=modes) 6967 6968 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6969 chain = None 6970 savepoint = None 6971 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6972 6973 self._match_texts(("TRANSACTION", "WORK")) 6974 6975 if self._match_text_seq("TO"): 6976 self._match_text_seq("SAVEPOINT") 6977 savepoint = self._parse_id_var() 6978 6979 if self._match(TokenType.AND): 6980 chain = not self._match_text_seq("NO") 6981 self._match_text_seq("CHAIN") 6982 6983 if is_rollback: 6984 return self.expression(exp.Rollback, savepoint=savepoint) 6985 6986 return self.expression(exp.Commit, chain=chain) 6987 6988 def _parse_refresh(self) -> exp.Refresh: 6989 self._match(TokenType.TABLE) 6990 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6991 6992 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6993 if not self._match_text_seq("ADD"): 6994 return None 6995 6996 self._match(TokenType.COLUMN) 6997 exists_column = self._parse_exists(not_=True) 6998 expression = self._parse_field_def() 6999 7000 if expression: 7001 expression.set("exists", exists_column) 7002 7003 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7004 if self._match_texts(("FIRST", "AFTER")): 7005 position = self._prev.text 7006 column_position = self.expression( 7007 exp.ColumnPosition, this=self._parse_column(), position=position 7008 ) 7009 expression.set("position", column_position) 7010 7011 return expression 7012 7013 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7014 drop = self._match(TokenType.DROP) and self._parse_drop() 7015 if drop and not isinstance(drop, exp.Command): 7016 drop.set("kind", drop.args.get("kind", "COLUMN")) 7017 return drop 7018 7019 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7020 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7021 return self.expression( 7022 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7023 ) 7024 7025 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7026 index = self._index - 1 7027 7028 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7029 return self._parse_csv( 7030 lambda: self.expression( 7031 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7032 ) 7033 ) 7034 7035 self._retreat(index) 7036 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7037 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7038 7039 if self._match_text_seq("ADD", "COLUMNS"): 7040 schema = self._parse_schema() 7041 if schema: 7042 return [schema] 7043 return [] 7044 7045 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7046 7047 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7048 if self._match_texts(self.ALTER_ALTER_PARSERS): 7049 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7050 7051 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7052 # keyword after ALTER we default to parsing this statement 7053 self._match(TokenType.COLUMN) 7054 column = self._parse_field(any_token=True) 7055 7056 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7057 return self.expression(exp.AlterColumn, this=column, drop=True) 7058 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7059 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7060 if self._match(TokenType.COMMENT): 7061 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7062 if self._match_text_seq("DROP", "NOT", "NULL"): 7063 return self.expression( 7064 exp.AlterColumn, 7065 this=column, 7066 drop=True, 7067 allow_null=True, 7068 ) 7069 if self._match_text_seq("SET", "NOT", "NULL"): 7070 return self.expression( 7071 exp.AlterColumn, 7072 this=column, 7073 allow_null=False, 7074 ) 7075 self._match_text_seq("SET", "DATA") 7076 self._match_text_seq("TYPE") 7077 return self.expression( 7078 exp.AlterColumn, 7079 this=column, 7080 dtype=self._parse_types(), 7081 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7082 using=self._match(TokenType.USING) and self._parse_assignment(), 7083 ) 7084 7085 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7086 if self._match_texts(("ALL", "EVEN", "AUTO")): 7087 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7088 7089 self._match_text_seq("KEY", "DISTKEY") 7090 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7091 7092 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7093 if compound: 7094 self._match_text_seq("SORTKEY") 7095 7096 if self._match(TokenType.L_PAREN, advance=False): 7097 return self.expression( 7098 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7099 ) 7100 7101 self._match_texts(("AUTO", "NONE")) 7102 return self.expression( 7103 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7104 ) 7105 7106 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7107 index = self._index - 1 7108 7109 partition_exists = self._parse_exists() 7110 if self._match(TokenType.PARTITION, advance=False): 7111 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7112 7113 self._retreat(index) 7114 return self._parse_csv(self._parse_drop_column) 7115 7116 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7117 if self._match(TokenType.COLUMN): 7118 exists = self._parse_exists() 7119 old_column = self._parse_column() 7120 to = self._match_text_seq("TO") 7121 new_column = self._parse_column() 7122 7123 if old_column is None or to is None or new_column is None: 7124 return None 7125 7126 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7127 7128 self._match_text_seq("TO") 7129 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7130 7131 def _parse_alter_table_set(self) -> exp.AlterSet: 7132 alter_set = self.expression(exp.AlterSet) 7133 7134 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7135 "TABLE", "PROPERTIES" 7136 ): 7137 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7138 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7139 alter_set.set("expressions", [self._parse_assignment()]) 7140 elif self._match_texts(("LOGGED", "UNLOGGED")): 7141 alter_set.set("option", exp.var(self._prev.text.upper())) 7142 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7143 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7144 elif self._match_text_seq("LOCATION"): 7145 alter_set.set("location", self._parse_field()) 7146 elif self._match_text_seq("ACCESS", "METHOD"): 7147 alter_set.set("access_method", self._parse_field()) 7148 elif self._match_text_seq("TABLESPACE"): 7149 alter_set.set("tablespace", self._parse_field()) 7150 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7151 alter_set.set("file_format", [self._parse_field()]) 7152 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7153 alter_set.set("file_format", self._parse_wrapped_options()) 7154 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7155 alter_set.set("copy_options", self._parse_wrapped_options()) 7156 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7157 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7158 else: 7159 if self._match_text_seq("SERDE"): 7160 alter_set.set("serde", self._parse_field()) 7161 7162 alter_set.set("expressions", [self._parse_properties()]) 7163 7164 return alter_set 7165 7166 def _parse_alter(self) -> exp.Alter | exp.Command: 7167 start = self._prev 7168 7169 alter_token = self._match_set(self.ALTERABLES) and self._prev 7170 if not alter_token: 7171 return self._parse_as_command(start) 7172 7173 exists = self._parse_exists() 7174 only = self._match_text_seq("ONLY") 7175 this = self._parse_table(schema=True) 7176 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7177 7178 if self._next: 7179 self._advance() 7180 7181 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7182 if parser: 7183 actions = ensure_list(parser(self)) 7184 not_valid = self._match_text_seq("NOT", "VALID") 7185 options = self._parse_csv(self._parse_property) 7186 7187 if not self._curr and actions: 7188 return self.expression( 7189 exp.Alter, 7190 this=this, 7191 kind=alter_token.text.upper(), 7192 exists=exists, 7193 actions=actions, 7194 only=only, 7195 options=options, 7196 cluster=cluster, 7197 not_valid=not_valid, 7198 ) 7199 7200 return self._parse_as_command(start) 7201 7202 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7203 start = self._prev 7204 # https://duckdb.org/docs/sql/statements/analyze 7205 if not self._curr: 7206 return self.expression(exp.Analyze) 7207 7208 options = [] 7209 while self._match_texts(self.ANALYZE_STYLES): 7210 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7211 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7212 else: 7213 options.append(self._prev.text.upper()) 7214 7215 this: t.Optional[exp.Expression] = None 7216 inner_expression: t.Optional[exp.Expression] = None 7217 7218 kind = self._curr and self._curr.text.upper() 7219 7220 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7221 this = self._parse_table_parts() 7222 elif self._match_text_seq("TABLES"): 7223 if self._match_set((TokenType.FROM, TokenType.IN)): 7224 kind = f"{kind} {self._prev.text.upper()}" 7225 this = self._parse_table(schema=True, is_db_reference=True) 7226 elif self._match_text_seq("DATABASE"): 7227 this = self._parse_table(schema=True, is_db_reference=True) 7228 elif self._match_text_seq("CLUSTER"): 7229 this = self._parse_table() 7230 # Try matching inner expr keywords before fallback to parse table. 7231 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7232 kind = None 7233 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7234 else: 7235 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7236 kind = None 7237 this = self._parse_table_parts() 7238 7239 partition = self._try_parse(self._parse_partition) 7240 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7241 return self._parse_as_command(start) 7242 7243 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7244 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7245 "WITH", "ASYNC", "MODE" 7246 ): 7247 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7248 else: 7249 mode = None 7250 7251 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7252 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7253 7254 properties = self._parse_properties() 7255 return self.expression( 7256 exp.Analyze, 7257 kind=kind, 7258 this=this, 7259 mode=mode, 7260 partition=partition, 7261 properties=properties, 7262 expression=inner_expression, 7263 options=options, 7264 ) 7265 7266 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7267 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7268 this = None 7269 kind = self._prev.text.upper() 7270 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7271 expressions = [] 7272 7273 if not self._match_text_seq("STATISTICS"): 7274 self.raise_error("Expecting token STATISTICS") 7275 7276 if self._match_text_seq("NOSCAN"): 7277 this = "NOSCAN" 7278 elif self._match(TokenType.FOR): 7279 if self._match_text_seq("ALL", "COLUMNS"): 7280 this = "FOR ALL COLUMNS" 7281 if self._match_texts("COLUMNS"): 7282 this = "FOR COLUMNS" 7283 expressions = self._parse_csv(self._parse_column_reference) 7284 elif self._match_text_seq("SAMPLE"): 7285 sample = self._parse_number() 7286 expressions = [ 7287 self.expression( 7288 exp.AnalyzeSample, 7289 sample=sample, 7290 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7291 ) 7292 ] 7293 7294 return self.expression( 7295 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7296 ) 7297 7298 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7299 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7300 kind = None 7301 this = None 7302 expression: t.Optional[exp.Expression] = None 7303 if self._match_text_seq("REF", "UPDATE"): 7304 kind = "REF" 7305 this = "UPDATE" 7306 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7307 this = "UPDATE SET DANGLING TO NULL" 7308 elif self._match_text_seq("STRUCTURE"): 7309 kind = "STRUCTURE" 7310 if self._match_text_seq("CASCADE", "FAST"): 7311 this = "CASCADE FAST" 7312 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7313 ("ONLINE", "OFFLINE") 7314 ): 7315 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7316 expression = self._parse_into() 7317 7318 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7319 7320 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7321 this = self._prev.text.upper() 7322 if self._match_text_seq("COLUMNS"): 7323 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7324 return None 7325 7326 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7327 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7328 if self._match_text_seq("STATISTICS"): 7329 return self.expression(exp.AnalyzeDelete, kind=kind) 7330 return None 7331 7332 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7333 if self._match_text_seq("CHAINED", "ROWS"): 7334 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7335 return None 7336 7337 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7338 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7339 this = self._prev.text.upper() 7340 expression: t.Optional[exp.Expression] = None 7341 expressions = [] 7342 update_options = None 7343 7344 if self._match_text_seq("HISTOGRAM", "ON"): 7345 expressions = self._parse_csv(self._parse_column_reference) 7346 with_expressions = [] 7347 while self._match(TokenType.WITH): 7348 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7349 if self._match_texts(("SYNC", "ASYNC")): 7350 if self._match_text_seq("MODE", advance=False): 7351 with_expressions.append(f"{self._prev.text.upper()} MODE") 7352 self._advance() 7353 else: 7354 buckets = self._parse_number() 7355 if self._match_text_seq("BUCKETS"): 7356 with_expressions.append(f"{buckets} BUCKETS") 7357 if with_expressions: 7358 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7359 7360 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7361 TokenType.UPDATE, advance=False 7362 ): 7363 update_options = self._prev.text.upper() 7364 self._advance() 7365 elif self._match_text_seq("USING", "DATA"): 7366 expression = self.expression(exp.UsingData, this=self._parse_string()) 7367 7368 return self.expression( 7369 exp.AnalyzeHistogram, 7370 this=this, 7371 expressions=expressions, 7372 expression=expression, 7373 update_options=update_options, 7374 ) 7375 7376 def _parse_merge(self) -> exp.Merge: 7377 self._match(TokenType.INTO) 7378 target = self._parse_table() 7379 7380 if target and self._match(TokenType.ALIAS, advance=False): 7381 target.set("alias", self._parse_table_alias()) 7382 7383 self._match(TokenType.USING) 7384 using = self._parse_table() 7385 7386 self._match(TokenType.ON) 7387 on = self._parse_assignment() 7388 7389 return self.expression( 7390 exp.Merge, 7391 this=target, 7392 using=using, 7393 on=on, 7394 whens=self._parse_when_matched(), 7395 returning=self._parse_returning(), 7396 ) 7397 7398 def _parse_when_matched(self) -> exp.Whens: 7399 whens = [] 7400 7401 while self._match(TokenType.WHEN): 7402 matched = not self._match(TokenType.NOT) 7403 self._match_text_seq("MATCHED") 7404 source = ( 7405 False 7406 if self._match_text_seq("BY", "TARGET") 7407 else self._match_text_seq("BY", "SOURCE") 7408 ) 7409 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7410 7411 self._match(TokenType.THEN) 7412 7413 if self._match(TokenType.INSERT): 7414 this = self._parse_star() 7415 if this: 7416 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7417 else: 7418 then = self.expression( 7419 exp.Insert, 7420 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7421 expression=self._match_text_seq("VALUES") and self._parse_value(), 7422 ) 7423 elif self._match(TokenType.UPDATE): 7424 expressions = self._parse_star() 7425 if expressions: 7426 then = self.expression(exp.Update, expressions=expressions) 7427 else: 7428 then = self.expression( 7429 exp.Update, 7430 expressions=self._match(TokenType.SET) 7431 and self._parse_csv(self._parse_equality), 7432 ) 7433 elif self._match(TokenType.DELETE): 7434 then = self.expression(exp.Var, this=self._prev.text) 7435 else: 7436 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7437 7438 whens.append( 7439 self.expression( 7440 exp.When, 7441 matched=matched, 7442 source=source, 7443 condition=condition, 7444 then=then, 7445 ) 7446 ) 7447 return self.expression(exp.Whens, expressions=whens) 7448 7449 def _parse_show(self) -> t.Optional[exp.Expression]: 7450 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7451 if parser: 7452 return parser(self) 7453 return self._parse_as_command(self._prev) 7454 7455 def _parse_set_item_assignment( 7456 self, kind: t.Optional[str] = None 7457 ) -> t.Optional[exp.Expression]: 7458 index = self._index 7459 7460 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7461 return self._parse_set_transaction(global_=kind == "GLOBAL") 7462 7463 left = self._parse_primary() or self._parse_column() 7464 assignment_delimiter = self._match_texts(("=", "TO")) 7465 7466 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7467 self._retreat(index) 7468 return None 7469 7470 right = self._parse_statement() or self._parse_id_var() 7471 if isinstance(right, (exp.Column, exp.Identifier)): 7472 right = exp.var(right.name) 7473 7474 this = self.expression(exp.EQ, this=left, expression=right) 7475 return self.expression(exp.SetItem, this=this, kind=kind) 7476 7477 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7478 self._match_text_seq("TRANSACTION") 7479 characteristics = self._parse_csv( 7480 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7481 ) 7482 return self.expression( 7483 exp.SetItem, 7484 expressions=characteristics, 7485 kind="TRANSACTION", 7486 **{"global": global_}, # type: ignore 7487 ) 7488 7489 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7490 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7491 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7492 7493 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7494 index = self._index 7495 set_ = self.expression( 7496 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7497 ) 7498 7499 if self._curr: 7500 self._retreat(index) 7501 return self._parse_as_command(self._prev) 7502 7503 return set_ 7504 7505 def _parse_var_from_options( 7506 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7507 ) -> t.Optional[exp.Var]: 7508 start = self._curr 7509 if not start: 7510 return None 7511 7512 option = start.text.upper() 7513 continuations = options.get(option) 7514 7515 index = self._index 7516 self._advance() 7517 for keywords in continuations or []: 7518 if isinstance(keywords, str): 7519 keywords = (keywords,) 7520 7521 if self._match_text_seq(*keywords): 7522 option = f"{option} {' '.join(keywords)}" 7523 break 7524 else: 7525 if continuations or continuations is None: 7526 if raise_unmatched: 7527 self.raise_error(f"Unknown option {option}") 7528 7529 self._retreat(index) 7530 return None 7531 7532 return exp.var(option) 7533 7534 def _parse_as_command(self, start: Token) -> exp.Command: 7535 while self._curr: 7536 self._advance() 7537 text = self._find_sql(start, self._prev) 7538 size = len(start.text) 7539 self._warn_unsupported() 7540 return exp.Command(this=text[:size], expression=text[size:]) 7541 7542 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7543 settings = [] 7544 7545 self._match_l_paren() 7546 kind = self._parse_id_var() 7547 7548 if self._match(TokenType.L_PAREN): 7549 while True: 7550 key = self._parse_id_var() 7551 value = self._parse_primary() 7552 if not key and value is None: 7553 break 7554 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7555 self._match(TokenType.R_PAREN) 7556 7557 self._match_r_paren() 7558 7559 return self.expression( 7560 exp.DictProperty, 7561 this=this, 7562 kind=kind.this if kind else None, 7563 settings=settings, 7564 ) 7565 7566 def _parse_dict_range(self, this: str) -> exp.DictRange: 7567 self._match_l_paren() 7568 has_min = self._match_text_seq("MIN") 7569 if has_min: 7570 min = self._parse_var() or self._parse_primary() 7571 self._match_text_seq("MAX") 7572 max = self._parse_var() or self._parse_primary() 7573 else: 7574 max = self._parse_var() or self._parse_primary() 7575 min = exp.Literal.number(0) 7576 self._match_r_paren() 7577 return self.expression(exp.DictRange, this=this, min=min, max=max) 7578 7579 def _parse_comprehension( 7580 self, this: t.Optional[exp.Expression] 7581 ) -> t.Optional[exp.Comprehension]: 7582 index = self._index 7583 expression = self._parse_column() 7584 if not self._match(TokenType.IN): 7585 self._retreat(index - 1) 7586 return None 7587 iterator = self._parse_column() 7588 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7589 return self.expression( 7590 exp.Comprehension, 7591 this=this, 7592 expression=expression, 7593 iterator=iterator, 7594 condition=condition, 7595 ) 7596 7597 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7598 if self._match(TokenType.HEREDOC_STRING): 7599 return self.expression(exp.Heredoc, this=self._prev.text) 7600 7601 if not self._match_text_seq("$"): 7602 return None 7603 7604 tags = ["$"] 7605 tag_text = None 7606 7607 if self._is_connected(): 7608 self._advance() 7609 tags.append(self._prev.text.upper()) 7610 else: 7611 self.raise_error("No closing $ found") 7612 7613 if tags[-1] != "$": 7614 if self._is_connected() and self._match_text_seq("$"): 7615 tag_text = tags[-1] 7616 tags.append("$") 7617 else: 7618 self.raise_error("No closing $ found") 7619 7620 heredoc_start = self._curr 7621 7622 while self._curr: 7623 if self._match_text_seq(*tags, advance=False): 7624 this = self._find_sql(heredoc_start, self._prev) 7625 self._advance(len(tags)) 7626 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7627 7628 self._advance() 7629 7630 self.raise_error(f"No closing {''.join(tags)} found") 7631 return None 7632 7633 def _find_parser( 7634 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7635 ) -> t.Optional[t.Callable]: 7636 if not self._curr: 7637 return None 7638 7639 index = self._index 7640 this = [] 7641 while True: 7642 # The current token might be multiple words 7643 curr = self._curr.text.upper() 7644 key = curr.split(" ") 7645 this.append(curr) 7646 7647 self._advance() 7648 result, trie = in_trie(trie, key) 7649 if result == TrieResult.FAILED: 7650 break 7651 7652 if result == TrieResult.EXISTS: 7653 subparser = parsers[" ".join(this)] 7654 return subparser 7655 7656 self._retreat(index) 7657 return None 7658 7659 def _match(self, token_type, advance=True, expression=None): 7660 if not self._curr: 7661 return None 7662 7663 if self._curr.token_type == token_type: 7664 if advance: 7665 self._advance() 7666 self._add_comments(expression) 7667 return True 7668 7669 return None 7670 7671 def _match_set(self, types, advance=True): 7672 if not self._curr: 7673 return None 7674 7675 if self._curr.token_type in types: 7676 if advance: 7677 self._advance() 7678 return True 7679 7680 return None 7681 7682 def _match_pair(self, token_type_a, token_type_b, advance=True): 7683 if not self._curr or not self._next: 7684 return None 7685 7686 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7687 if advance: 7688 self._advance(2) 7689 return True 7690 7691 return None 7692 7693 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7694 if not self._match(TokenType.L_PAREN, expression=expression): 7695 self.raise_error("Expecting (") 7696 7697 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7698 if not self._match(TokenType.R_PAREN, expression=expression): 7699 self.raise_error("Expecting )") 7700 7701 def _match_texts(self, texts, advance=True): 7702 if ( 7703 self._curr 7704 and self._curr.token_type != TokenType.STRING 7705 and self._curr.text.upper() in texts 7706 ): 7707 if advance: 7708 self._advance() 7709 return True 7710 return None 7711 7712 def _match_text_seq(self, *texts, advance=True): 7713 index = self._index 7714 for text in texts: 7715 if ( 7716 self._curr 7717 and self._curr.token_type != TokenType.STRING 7718 and self._curr.text.upper() == text 7719 ): 7720 self._advance() 7721 else: 7722 self._retreat(index) 7723 return None 7724 7725 if not advance: 7726 self._retreat(index) 7727 7728 return True 7729 7730 def _replace_lambda( 7731 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7732 ) -> t.Optional[exp.Expression]: 7733 if not node: 7734 return node 7735 7736 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7737 7738 for column in node.find_all(exp.Column): 7739 typ = lambda_types.get(column.parts[0].name) 7740 if typ is not None: 7741 dot_or_id = column.to_dot() if column.table else column.this 7742 7743 if typ: 7744 dot_or_id = self.expression( 7745 exp.Cast, 7746 this=dot_or_id, 7747 to=typ, 7748 ) 7749 7750 parent = column.parent 7751 7752 while isinstance(parent, exp.Dot): 7753 if not isinstance(parent.parent, exp.Dot): 7754 parent.replace(dot_or_id) 7755 break 7756 parent = parent.parent 7757 else: 7758 if column is node: 7759 node = dot_or_id 7760 else: 7761 column.replace(dot_or_id) 7762 return node 7763 7764 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7765 start = self._prev 7766 7767 # Not to be confused with TRUNCATE(number, decimals) function call 7768 if self._match(TokenType.L_PAREN): 7769 self._retreat(self._index - 2) 7770 return self._parse_function() 7771 7772 # Clickhouse supports TRUNCATE DATABASE as well 7773 is_database = self._match(TokenType.DATABASE) 7774 7775 self._match(TokenType.TABLE) 7776 7777 exists = self._parse_exists(not_=False) 7778 7779 expressions = self._parse_csv( 7780 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7781 ) 7782 7783 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7784 7785 if self._match_text_seq("RESTART", "IDENTITY"): 7786 identity = "RESTART" 7787 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7788 identity = "CONTINUE" 7789 else: 7790 identity = None 7791 7792 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7793 option = self._prev.text 7794 else: 7795 option = None 7796 7797 partition = self._parse_partition() 7798 7799 # Fallback case 7800 if self._curr: 7801 return self._parse_as_command(start) 7802 7803 return self.expression( 7804 exp.TruncateTable, 7805 expressions=expressions, 7806 is_database=is_database, 7807 exists=exists, 7808 cluster=cluster, 7809 identity=identity, 7810 option=option, 7811 partition=partition, 7812 ) 7813 7814 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7815 this = self._parse_ordered(self._parse_opclass) 7816 7817 if not self._match(TokenType.WITH): 7818 return this 7819 7820 op = self._parse_var(any_token=True) 7821 7822 return self.expression(exp.WithOperator, this=this, op=op) 7823 7824 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7825 self._match(TokenType.EQ) 7826 self._match(TokenType.L_PAREN) 7827 7828 opts: t.List[t.Optional[exp.Expression]] = [] 7829 while self._curr and not self._match(TokenType.R_PAREN): 7830 if self._match_text_seq("FORMAT_NAME", "="): 7831 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7832 # so we parse it separately to use _parse_field() 7833 prop = self.expression( 7834 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7835 ) 7836 opts.append(prop) 7837 else: 7838 opts.append(self._parse_property()) 7839 7840 self._match(TokenType.COMMA) 7841 7842 return opts 7843 7844 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7845 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7846 7847 options = [] 7848 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7849 option = self._parse_var(any_token=True) 7850 prev = self._prev.text.upper() 7851 7852 # Different dialects might separate options and values by white space, "=" and "AS" 7853 self._match(TokenType.EQ) 7854 self._match(TokenType.ALIAS) 7855 7856 param = self.expression(exp.CopyParameter, this=option) 7857 7858 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7859 TokenType.L_PAREN, advance=False 7860 ): 7861 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7862 param.set("expressions", self._parse_wrapped_options()) 7863 elif prev == "FILE_FORMAT": 7864 # T-SQL's external file format case 7865 param.set("expression", self._parse_field()) 7866 else: 7867 param.set("expression", self._parse_unquoted_field()) 7868 7869 options.append(param) 7870 self._match(sep) 7871 7872 return options 7873 7874 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7875 expr = self.expression(exp.Credentials) 7876 7877 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7878 expr.set("storage", self._parse_field()) 7879 if self._match_text_seq("CREDENTIALS"): 7880 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7881 creds = ( 7882 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7883 ) 7884 expr.set("credentials", creds) 7885 if self._match_text_seq("ENCRYPTION"): 7886 expr.set("encryption", self._parse_wrapped_options()) 7887 if self._match_text_seq("IAM_ROLE"): 7888 expr.set("iam_role", self._parse_field()) 7889 if self._match_text_seq("REGION"): 7890 expr.set("region", self._parse_field()) 7891 7892 return expr 7893 7894 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7895 return self._parse_field() 7896 7897 def _parse_copy(self) -> exp.Copy | exp.Command: 7898 start = self._prev 7899 7900 self._match(TokenType.INTO) 7901 7902 this = ( 7903 self._parse_select(nested=True, parse_subquery_alias=False) 7904 if self._match(TokenType.L_PAREN, advance=False) 7905 else self._parse_table(schema=True) 7906 ) 7907 7908 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7909 7910 files = self._parse_csv(self._parse_file_location) 7911 credentials = self._parse_credentials() 7912 7913 self._match_text_seq("WITH") 7914 7915 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7916 7917 # Fallback case 7918 if self._curr: 7919 return self._parse_as_command(start) 7920 7921 return self.expression( 7922 exp.Copy, 7923 this=this, 7924 kind=kind, 7925 credentials=credentials, 7926 files=files, 7927 params=params, 7928 ) 7929 7930 def _parse_normalize(self) -> exp.Normalize: 7931 return self.expression( 7932 exp.Normalize, 7933 this=self._parse_bitwise(), 7934 form=self._match(TokenType.COMMA) and self._parse_var(), 7935 ) 7936 7937 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7938 args = self._parse_csv(lambda: self._parse_lambda()) 7939 7940 this = seq_get(args, 0) 7941 decimals = seq_get(args, 1) 7942 7943 return expr_type( 7944 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7945 ) 7946 7947 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7948 if self._match_text_seq("COLUMNS", "(", advance=False): 7949 this = self._parse_function() 7950 if isinstance(this, exp.Columns): 7951 this.set("unpack", True) 7952 return this 7953 7954 return self.expression( 7955 exp.Star, 7956 **{ # type: ignore 7957 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7958 "replace": self._parse_star_op("REPLACE"), 7959 "rename": self._parse_star_op("RENAME"), 7960 }, 7961 ) 7962 7963 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7964 privilege_parts = [] 7965 7966 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7967 # (end of privilege list) or L_PAREN (start of column list) are met 7968 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7969 privilege_parts.append(self._curr.text.upper()) 7970 self._advance() 7971 7972 this = exp.var(" ".join(privilege_parts)) 7973 expressions = ( 7974 self._parse_wrapped_csv(self._parse_column) 7975 if self._match(TokenType.L_PAREN, advance=False) 7976 else None 7977 ) 7978 7979 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7980 7981 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7982 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7983 principal = self._parse_id_var() 7984 7985 if not principal: 7986 return None 7987 7988 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7989 7990 def _parse_grant(self) -> exp.Grant | exp.Command: 7991 start = self._prev 7992 7993 privileges = self._parse_csv(self._parse_grant_privilege) 7994 7995 self._match(TokenType.ON) 7996 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7997 7998 # Attempt to parse the securable e.g. MySQL allows names 7999 # such as "foo.*", "*.*" which are not easily parseable yet 8000 securable = self._try_parse(self._parse_table_parts) 8001 8002 if not securable or not self._match_text_seq("TO"): 8003 return self._parse_as_command(start) 8004 8005 principals = self._parse_csv(self._parse_grant_principal) 8006 8007 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8008 8009 if self._curr: 8010 return self._parse_as_command(start) 8011 8012 return self.expression( 8013 exp.Grant, 8014 privileges=privileges, 8015 kind=kind, 8016 securable=securable, 8017 principals=principals, 8018 grant_option=grant_option, 8019 ) 8020 8021 def _parse_overlay(self) -> exp.Overlay: 8022 return self.expression( 8023 exp.Overlay, 8024 **{ # type: ignore 8025 "this": self._parse_bitwise(), 8026 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8027 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8028 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8029 }, 8030 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1451 def __init__( 1452 self, 1453 error_level: t.Optional[ErrorLevel] = None, 1454 error_message_context: int = 100, 1455 max_errors: int = 3, 1456 dialect: DialectType = None, 1457 ): 1458 from sqlglot.dialects import Dialect 1459 1460 self.error_level = error_level or ErrorLevel.IMMEDIATE 1461 self.error_message_context = error_message_context 1462 self.max_errors = max_errors 1463 self.dialect = Dialect.get_or_raise(dialect) 1464 self.reset()
1476 def parse( 1477 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1478 ) -> t.List[t.Optional[exp.Expression]]: 1479 """ 1480 Parses a list of tokens and returns a list of syntax trees, one tree 1481 per parsed SQL statement. 1482 1483 Args: 1484 raw_tokens: The list of tokens. 1485 sql: The original SQL string, used to produce helpful debug messages. 1486 1487 Returns: 1488 The list of the produced syntax trees. 1489 """ 1490 return self._parse( 1491 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1492 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1494 def parse_into( 1495 self, 1496 expression_types: exp.IntoType, 1497 raw_tokens: t.List[Token], 1498 sql: t.Optional[str] = None, 1499 ) -> t.List[t.Optional[exp.Expression]]: 1500 """ 1501 Parses a list of tokens into a given Expression type. If a collection of Expression 1502 types is given instead, this method will try to parse the token list into each one 1503 of them, stopping at the first for which the parsing succeeds. 1504 1505 Args: 1506 expression_types: The expression type(s) to try and parse the token list into. 1507 raw_tokens: The list of tokens. 1508 sql: The original SQL string, used to produce helpful debug messages. 1509 1510 Returns: 1511 The target Expression. 1512 """ 1513 errors = [] 1514 for expression_type in ensure_list(expression_types): 1515 parser = self.EXPRESSION_PARSERS.get(expression_type) 1516 if not parser: 1517 raise TypeError(f"No parser registered for {expression_type}") 1518 1519 try: 1520 return self._parse(parser, raw_tokens, sql) 1521 except ParseError as e: 1522 e.errors[0]["into_expression"] = expression_type 1523 errors.append(e) 1524 1525 raise ParseError( 1526 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1527 errors=merge_errors(errors), 1528 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1568 def check_errors(self) -> None: 1569 """Logs or raises any found errors, depending on the chosen error level setting.""" 1570 if self.error_level == ErrorLevel.WARN: 1571 for error in self.errors: 1572 logger.error(str(error)) 1573 elif self.error_level == ErrorLevel.RAISE and self.errors: 1574 raise ParseError( 1575 concat_messages(self.errors, self.max_errors), 1576 errors=merge_errors(self.errors), 1577 )
Logs or raises any found errors, depending on the chosen error level setting.
1579 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1580 """ 1581 Appends an error in the list of recorded errors or raises it, depending on the chosen 1582 error level setting. 1583 """ 1584 token = token or self._curr or self._prev or Token.string("") 1585 start = token.start 1586 end = token.end + 1 1587 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1588 highlight = self.sql[start:end] 1589 end_context = self.sql[end : end + self.error_message_context] 1590 1591 error = ParseError.new( 1592 f"{message}. Line {token.line}, Col: {token.col}.\n" 1593 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1594 description=message, 1595 line=token.line, 1596 col=token.col, 1597 start_context=start_context, 1598 highlight=highlight, 1599 end_context=end_context, 1600 ) 1601 1602 if self.error_level == ErrorLevel.IMMEDIATE: 1603 raise error 1604 1605 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1607 def expression( 1608 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1609 ) -> E: 1610 """ 1611 Creates a new, validated Expression. 1612 1613 Args: 1614 exp_class: The expression class to instantiate. 1615 comments: An optional list of comments to attach to the expression. 1616 kwargs: The arguments to set for the expression along with their respective values. 1617 1618 Returns: 1619 The target expression. 1620 """ 1621 instance = exp_class(**kwargs) 1622 instance.add_comments(comments) if comments else self._add_comments(instance) 1623 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1630 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1631 """ 1632 Validates an Expression, making sure that all its mandatory arguments are set. 1633 1634 Args: 1635 expression: The expression to validate. 1636 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1637 1638 Returns: 1639 The validated expression. 1640 """ 1641 if self.error_level != ErrorLevel.IGNORE: 1642 for error_message in expression.error_messages(args): 1643 self.raise_error(error_message) 1644 1645 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.