sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 20 21logger = logging.getLogger("sqlglot") 22 23OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 24 25 26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 37 38 39def build_like(args: t.List) -> exp.Escape | exp.Like: 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range 56 57 58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 69 70 71def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 72 arg = seq_get(args, 0) 73 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 74 75 76def build_lower(args: t.List) -> exp.Lower | exp.Hex: 77 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 78 arg = seq_get(args, 0) 79 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 80 81 82def build_upper(args: t.List) -> exp.Upper | exp.Hex: 83 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 84 arg = seq_get(args, 0) 85 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 86 87 88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder 99 100 101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression) 110 111 112def build_pad(args: t.List, is_left: bool = True): 113 return exp.Pad( 114 this=seq_get(args, 0), 115 expression=seq_get(args, 1), 116 fill_pattern=seq_get(args, 2), 117 is_left=is_left, 118 ) 119 120 121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp 130 131 132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args) 142 143 144def build_trim(args: t.List, is_left: bool = True): 145 return exp.Trim( 146 this=seq_get(args, 0), 147 expression=seq_get(args, 1), 148 position="LEADING" if is_left else "TRAILING", 149 ) 150 151 152def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 153 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 154 155 156def build_locate_strposition(args: t.List): 157 return exp.StrPosition( 158 this=seq_get(args, 1), 159 substr=seq_get(args, 0), 160 position=seq_get(args, 2), 161 ) 162 163 164class _Parser(type): 165 def __new__(cls, clsname, bases, attrs): 166 klass = super().__new__(cls, clsname, bases, attrs) 167 168 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 169 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 170 171 return klass 172 173 174class Parser(metaclass=_Parser): 175 """ 176 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 177 178 Args: 179 error_level: The desired error level. 180 Default: ErrorLevel.IMMEDIATE 181 error_message_context: The amount of context to capture from a query string when displaying 182 the error message (in number of characters). 183 Default: 100 184 max_errors: Maximum number of error messages to include in a raised ParseError. 185 This is only relevant if error_level is ErrorLevel.RAISE. 186 Default: 3 187 """ 188 189 FUNCTIONS: t.Dict[str, t.Callable] = { 190 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 191 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 192 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 193 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 194 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 195 ), 196 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "CHAR": lambda args: exp.Chr(expressions=args), 200 "CHR": lambda args: exp.Chr(expressions=args), 201 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 202 "CONCAT": lambda args, dialect: exp.Concat( 203 expressions=args, 204 safe=not dialect.STRICT_STRING_CONCAT, 205 coalesce=dialect.CONCAT_COALESCE, 206 ), 207 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 208 expressions=args, 209 safe=not dialect.STRICT_STRING_CONCAT, 210 coalesce=dialect.CONCAT_COALESCE, 211 ), 212 "CONVERT_TIMEZONE": build_convert_timezone, 213 "DATE_TO_DATE_STR": lambda args: exp.Cast( 214 this=seq_get(args, 0), 215 to=exp.DataType(this=exp.DataType.Type.TEXT), 216 ), 217 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 218 start=seq_get(args, 0), 219 end=seq_get(args, 1), 220 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 221 ), 222 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 223 "HEX": build_hex, 224 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 225 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 226 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 227 "LIKE": build_like, 228 "LOG": build_logarithm, 229 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 230 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 231 "LOWER": build_lower, 232 "LPAD": lambda args: build_pad(args), 233 "LEFTPAD": lambda args: build_pad(args), 234 "LTRIM": lambda args: build_trim(args), 235 "MOD": build_mod, 236 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 237 "RPAD": lambda args: build_pad(args, is_left=False), 238 "RTRIM": lambda args: build_trim(args, is_left=False), 239 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 240 if len(args) != 2 241 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 242 "STRPOS": exp.StrPosition.from_arg_list, 243 "CHARINDEX": lambda args: build_locate_strposition(args), 244 "INSTR": exp.StrPosition.from_arg_list, 245 "LOCATE": lambda args: build_locate_strposition(args), 246 "TIME_TO_TIME_STR": lambda args: exp.Cast( 247 this=seq_get(args, 0), 248 to=exp.DataType(this=exp.DataType.Type.TEXT), 249 ), 250 "TO_HEX": build_hex, 251 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 252 this=exp.Cast( 253 this=seq_get(args, 0), 254 to=exp.DataType(this=exp.DataType.Type.TEXT), 255 ), 256 start=exp.Literal.number(1), 257 length=exp.Literal.number(10), 258 ), 259 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 260 "UPPER": build_upper, 261 "VAR_MAP": build_var_map, 262 } 263 264 NO_PAREN_FUNCTIONS = { 265 TokenType.CURRENT_DATE: exp.CurrentDate, 266 TokenType.CURRENT_DATETIME: exp.CurrentDate, 267 TokenType.CURRENT_TIME: exp.CurrentTime, 268 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 269 TokenType.CURRENT_USER: exp.CurrentUser, 270 } 271 272 STRUCT_TYPE_TOKENS = { 273 TokenType.NESTED, 274 TokenType.OBJECT, 275 TokenType.STRUCT, 276 TokenType.UNION, 277 } 278 279 NESTED_TYPE_TOKENS = { 280 TokenType.ARRAY, 281 TokenType.LIST, 282 TokenType.LOWCARDINALITY, 283 TokenType.MAP, 284 TokenType.NULLABLE, 285 TokenType.RANGE, 286 *STRUCT_TYPE_TOKENS, 287 } 288 289 ENUM_TYPE_TOKENS = { 290 TokenType.DYNAMIC, 291 TokenType.ENUM, 292 TokenType.ENUM8, 293 TokenType.ENUM16, 294 } 295 296 AGGREGATE_TYPE_TOKENS = { 297 TokenType.AGGREGATEFUNCTION, 298 TokenType.SIMPLEAGGREGATEFUNCTION, 299 } 300 301 TYPE_TOKENS = { 302 TokenType.BIT, 303 TokenType.BOOLEAN, 304 TokenType.TINYINT, 305 TokenType.UTINYINT, 306 TokenType.SMALLINT, 307 TokenType.USMALLINT, 308 TokenType.INT, 309 TokenType.UINT, 310 TokenType.BIGINT, 311 TokenType.UBIGINT, 312 TokenType.INT128, 313 TokenType.UINT128, 314 TokenType.INT256, 315 TokenType.UINT256, 316 TokenType.MEDIUMINT, 317 TokenType.UMEDIUMINT, 318 TokenType.FIXEDSTRING, 319 TokenType.FLOAT, 320 TokenType.DOUBLE, 321 TokenType.UDOUBLE, 322 TokenType.CHAR, 323 TokenType.NCHAR, 324 TokenType.VARCHAR, 325 TokenType.NVARCHAR, 326 TokenType.BPCHAR, 327 TokenType.TEXT, 328 TokenType.MEDIUMTEXT, 329 TokenType.LONGTEXT, 330 TokenType.BLOB, 331 TokenType.MEDIUMBLOB, 332 TokenType.LONGBLOB, 333 TokenType.BINARY, 334 TokenType.VARBINARY, 335 TokenType.JSON, 336 TokenType.JSONB, 337 TokenType.INTERVAL, 338 TokenType.TINYBLOB, 339 TokenType.TINYTEXT, 340 TokenType.TIME, 341 TokenType.TIMETZ, 342 TokenType.TIMESTAMP, 343 TokenType.TIMESTAMP_S, 344 TokenType.TIMESTAMP_MS, 345 TokenType.TIMESTAMP_NS, 346 TokenType.TIMESTAMPTZ, 347 TokenType.TIMESTAMPLTZ, 348 TokenType.TIMESTAMPNTZ, 349 TokenType.DATETIME, 350 TokenType.DATETIME2, 351 TokenType.DATETIME64, 352 TokenType.SMALLDATETIME, 353 TokenType.DATE, 354 TokenType.DATE32, 355 TokenType.INT4RANGE, 356 TokenType.INT4MULTIRANGE, 357 TokenType.INT8RANGE, 358 TokenType.INT8MULTIRANGE, 359 TokenType.NUMRANGE, 360 TokenType.NUMMULTIRANGE, 361 TokenType.TSRANGE, 362 TokenType.TSMULTIRANGE, 363 TokenType.TSTZRANGE, 364 TokenType.TSTZMULTIRANGE, 365 TokenType.DATERANGE, 366 TokenType.DATEMULTIRANGE, 367 TokenType.DECIMAL, 368 TokenType.DECIMAL32, 369 TokenType.DECIMAL64, 370 TokenType.DECIMAL128, 371 TokenType.DECIMAL256, 372 TokenType.UDECIMAL, 373 TokenType.BIGDECIMAL, 374 TokenType.UUID, 375 TokenType.GEOGRAPHY, 376 TokenType.GEOMETRY, 377 TokenType.POINT, 378 TokenType.RING, 379 TokenType.LINESTRING, 380 TokenType.MULTILINESTRING, 381 TokenType.POLYGON, 382 TokenType.MULTIPOLYGON, 383 TokenType.HLLSKETCH, 384 TokenType.HSTORE, 385 TokenType.PSEUDO_TYPE, 386 TokenType.SUPER, 387 TokenType.SERIAL, 388 TokenType.SMALLSERIAL, 389 TokenType.BIGSERIAL, 390 TokenType.XML, 391 TokenType.YEAR, 392 TokenType.USERDEFINED, 393 TokenType.MONEY, 394 TokenType.SMALLMONEY, 395 TokenType.ROWVERSION, 396 TokenType.IMAGE, 397 TokenType.VARIANT, 398 TokenType.VECTOR, 399 TokenType.OBJECT, 400 TokenType.OBJECT_IDENTIFIER, 401 TokenType.INET, 402 TokenType.IPADDRESS, 403 TokenType.IPPREFIX, 404 TokenType.IPV4, 405 TokenType.IPV6, 406 TokenType.UNKNOWN, 407 TokenType.NULL, 408 TokenType.NAME, 409 TokenType.TDIGEST, 410 TokenType.DYNAMIC, 411 *ENUM_TYPE_TOKENS, 412 *NESTED_TYPE_TOKENS, 413 *AGGREGATE_TYPE_TOKENS, 414 } 415 416 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 417 TokenType.BIGINT: TokenType.UBIGINT, 418 TokenType.INT: TokenType.UINT, 419 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 420 TokenType.SMALLINT: TokenType.USMALLINT, 421 TokenType.TINYINT: TokenType.UTINYINT, 422 TokenType.DECIMAL: TokenType.UDECIMAL, 423 TokenType.DOUBLE: TokenType.UDOUBLE, 424 } 425 426 SUBQUERY_PREDICATES = { 427 TokenType.ANY: exp.Any, 428 TokenType.ALL: exp.All, 429 TokenType.EXISTS: exp.Exists, 430 TokenType.SOME: exp.Any, 431 } 432 433 RESERVED_TOKENS = { 434 *Tokenizer.SINGLE_TOKENS.values(), 435 TokenType.SELECT, 436 } - {TokenType.IDENTIFIER} 437 438 DB_CREATABLES = { 439 TokenType.DATABASE, 440 TokenType.DICTIONARY, 441 TokenType.MODEL, 442 TokenType.NAMESPACE, 443 TokenType.SCHEMA, 444 TokenType.SEQUENCE, 445 TokenType.SINK, 446 TokenType.SOURCE, 447 TokenType.STORAGE_INTEGRATION, 448 TokenType.STREAMLIT, 449 TokenType.TABLE, 450 TokenType.TAG, 451 TokenType.VIEW, 452 TokenType.WAREHOUSE, 453 } 454 455 CREATABLES = { 456 TokenType.COLUMN, 457 TokenType.CONSTRAINT, 458 TokenType.FOREIGN_KEY, 459 TokenType.FUNCTION, 460 TokenType.INDEX, 461 TokenType.PROCEDURE, 462 *DB_CREATABLES, 463 } 464 465 ALTERABLES = { 466 TokenType.INDEX, 467 TokenType.TABLE, 468 TokenType.VIEW, 469 } 470 471 # Tokens that can represent identifiers 472 ID_VAR_TOKENS = { 473 TokenType.ALL, 474 TokenType.ATTACH, 475 TokenType.VAR, 476 TokenType.ANTI, 477 TokenType.APPLY, 478 TokenType.ASC, 479 TokenType.ASOF, 480 TokenType.AUTO_INCREMENT, 481 TokenType.BEGIN, 482 TokenType.BPCHAR, 483 TokenType.CACHE, 484 TokenType.CASE, 485 TokenType.COLLATE, 486 TokenType.COMMAND, 487 TokenType.COMMENT, 488 TokenType.COMMIT, 489 TokenType.CONSTRAINT, 490 TokenType.COPY, 491 TokenType.CUBE, 492 TokenType.CURRENT_SCHEMA, 493 TokenType.DEFAULT, 494 TokenType.DELETE, 495 TokenType.DESC, 496 TokenType.DESCRIBE, 497 TokenType.DETACH, 498 TokenType.DICTIONARY, 499 TokenType.DIV, 500 TokenType.END, 501 TokenType.EXECUTE, 502 TokenType.EXPORT, 503 TokenType.ESCAPE, 504 TokenType.FALSE, 505 TokenType.FIRST, 506 TokenType.FILTER, 507 TokenType.FINAL, 508 TokenType.FORMAT, 509 TokenType.FULL, 510 TokenType.IDENTIFIER, 511 TokenType.IS, 512 TokenType.ISNULL, 513 TokenType.INTERVAL, 514 TokenType.KEEP, 515 TokenType.KILL, 516 TokenType.LEFT, 517 TokenType.LIMIT, 518 TokenType.LOAD, 519 TokenType.MERGE, 520 TokenType.NATURAL, 521 TokenType.NEXT, 522 TokenType.OFFSET, 523 TokenType.OPERATOR, 524 TokenType.ORDINALITY, 525 TokenType.OVERLAPS, 526 TokenType.OVERWRITE, 527 TokenType.PARTITION, 528 TokenType.PERCENT, 529 TokenType.PIVOT, 530 TokenType.PRAGMA, 531 TokenType.PUT, 532 TokenType.RANGE, 533 TokenType.RECURSIVE, 534 TokenType.REFERENCES, 535 TokenType.REFRESH, 536 TokenType.RENAME, 537 TokenType.REPLACE, 538 TokenType.RIGHT, 539 TokenType.ROLLUP, 540 TokenType.ROW, 541 TokenType.ROWS, 542 TokenType.SEMI, 543 TokenType.SET, 544 TokenType.SETTINGS, 545 TokenType.SHOW, 546 TokenType.TEMPORARY, 547 TokenType.TOP, 548 TokenType.TRUE, 549 TokenType.TRUNCATE, 550 TokenType.UNIQUE, 551 TokenType.UNNEST, 552 TokenType.UNPIVOT, 553 TokenType.UPDATE, 554 TokenType.USE, 555 TokenType.VOLATILE, 556 TokenType.WINDOW, 557 *CREATABLES, 558 *SUBQUERY_PREDICATES, 559 *TYPE_TOKENS, 560 *NO_PAREN_FUNCTIONS, 561 } 562 ID_VAR_TOKENS.remove(TokenType.UNION) 563 564 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 565 TokenType.ANTI, 566 TokenType.APPLY, 567 TokenType.ASOF, 568 TokenType.FULL, 569 TokenType.LEFT, 570 TokenType.LOCK, 571 TokenType.NATURAL, 572 TokenType.RIGHT, 573 TokenType.SEMI, 574 TokenType.WINDOW, 575 } 576 577 ALIAS_TOKENS = ID_VAR_TOKENS 578 579 ARRAY_CONSTRUCTORS = { 580 "ARRAY": exp.Array, 581 "LIST": exp.List, 582 } 583 584 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 585 586 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 587 588 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 589 590 FUNC_TOKENS = { 591 TokenType.COLLATE, 592 TokenType.COMMAND, 593 TokenType.CURRENT_DATE, 594 TokenType.CURRENT_DATETIME, 595 TokenType.CURRENT_SCHEMA, 596 TokenType.CURRENT_TIMESTAMP, 597 TokenType.CURRENT_TIME, 598 TokenType.CURRENT_USER, 599 TokenType.FILTER, 600 TokenType.FIRST, 601 TokenType.FORMAT, 602 TokenType.GLOB, 603 TokenType.IDENTIFIER, 604 TokenType.INDEX, 605 TokenType.ISNULL, 606 TokenType.ILIKE, 607 TokenType.INSERT, 608 TokenType.LIKE, 609 TokenType.MERGE, 610 TokenType.NEXT, 611 TokenType.OFFSET, 612 TokenType.PRIMARY_KEY, 613 TokenType.RANGE, 614 TokenType.REPLACE, 615 TokenType.RLIKE, 616 TokenType.ROW, 617 TokenType.UNNEST, 618 TokenType.VAR, 619 TokenType.LEFT, 620 TokenType.RIGHT, 621 TokenType.SEQUENCE, 622 TokenType.DATE, 623 TokenType.DATETIME, 624 TokenType.TABLE, 625 TokenType.TIMESTAMP, 626 TokenType.TIMESTAMPTZ, 627 TokenType.TRUNCATE, 628 TokenType.WINDOW, 629 TokenType.XOR, 630 *TYPE_TOKENS, 631 *SUBQUERY_PREDICATES, 632 } 633 634 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 635 TokenType.AND: exp.And, 636 } 637 638 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 639 TokenType.COLON_EQ: exp.PropertyEQ, 640 } 641 642 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 643 TokenType.OR: exp.Or, 644 } 645 646 EQUALITY = { 647 TokenType.EQ: exp.EQ, 648 TokenType.NEQ: exp.NEQ, 649 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 650 } 651 652 COMPARISON = { 653 TokenType.GT: exp.GT, 654 TokenType.GTE: exp.GTE, 655 TokenType.LT: exp.LT, 656 TokenType.LTE: exp.LTE, 657 } 658 659 BITWISE = { 660 TokenType.AMP: exp.BitwiseAnd, 661 TokenType.CARET: exp.BitwiseXor, 662 TokenType.PIPE: exp.BitwiseOr, 663 } 664 665 TERM = { 666 TokenType.DASH: exp.Sub, 667 TokenType.PLUS: exp.Add, 668 TokenType.MOD: exp.Mod, 669 TokenType.COLLATE: exp.Collate, 670 } 671 672 FACTOR = { 673 TokenType.DIV: exp.IntDiv, 674 TokenType.LR_ARROW: exp.Distance, 675 TokenType.SLASH: exp.Div, 676 TokenType.STAR: exp.Mul, 677 } 678 679 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 680 681 TIMES = { 682 TokenType.TIME, 683 TokenType.TIMETZ, 684 } 685 686 TIMESTAMPS = { 687 TokenType.TIMESTAMP, 688 TokenType.TIMESTAMPNTZ, 689 TokenType.TIMESTAMPTZ, 690 TokenType.TIMESTAMPLTZ, 691 *TIMES, 692 } 693 694 SET_OPERATIONS = { 695 TokenType.UNION, 696 TokenType.INTERSECT, 697 TokenType.EXCEPT, 698 } 699 700 JOIN_METHODS = { 701 TokenType.ASOF, 702 TokenType.NATURAL, 703 TokenType.POSITIONAL, 704 } 705 706 JOIN_SIDES = { 707 TokenType.LEFT, 708 TokenType.RIGHT, 709 TokenType.FULL, 710 } 711 712 JOIN_KINDS = { 713 TokenType.ANTI, 714 TokenType.CROSS, 715 TokenType.INNER, 716 TokenType.OUTER, 717 TokenType.SEMI, 718 TokenType.STRAIGHT_JOIN, 719 } 720 721 JOIN_HINTS: t.Set[str] = set() 722 723 LAMBDAS = { 724 TokenType.ARROW: lambda self, expressions: self.expression( 725 exp.Lambda, 726 this=self._replace_lambda( 727 self._parse_assignment(), 728 expressions, 729 ), 730 expressions=expressions, 731 ), 732 TokenType.FARROW: lambda self, expressions: self.expression( 733 exp.Kwarg, 734 this=exp.var(expressions[0].name), 735 expression=self._parse_assignment(), 736 ), 737 } 738 739 COLUMN_OPERATORS = { 740 TokenType.DOT: None, 741 TokenType.DOTCOLON: lambda self, this, to: self.expression( 742 exp.JSONCast, 743 this=this, 744 to=to, 745 ), 746 TokenType.DCOLON: lambda self, this, to: self.expression( 747 exp.Cast if self.STRICT_CAST else exp.TryCast, 748 this=this, 749 to=to, 750 ), 751 TokenType.ARROW: lambda self, this, path: self.expression( 752 exp.JSONExtract, 753 this=this, 754 expression=self.dialect.to_json_path(path), 755 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 756 ), 757 TokenType.DARROW: lambda self, this, path: self.expression( 758 exp.JSONExtractScalar, 759 this=this, 760 expression=self.dialect.to_json_path(path), 761 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 762 ), 763 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 764 exp.JSONBExtract, 765 this=this, 766 expression=path, 767 ), 768 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 769 exp.JSONBExtractScalar, 770 this=this, 771 expression=path, 772 ), 773 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 774 exp.JSONBContains, 775 this=this, 776 expression=key, 777 ), 778 } 779 780 EXPRESSION_PARSERS = { 781 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 782 exp.Column: lambda self: self._parse_column(), 783 exp.Condition: lambda self: self._parse_assignment(), 784 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 785 exp.Expression: lambda self: self._parse_expression(), 786 exp.From: lambda self: self._parse_from(joins=True), 787 exp.Group: lambda self: self._parse_group(), 788 exp.Having: lambda self: self._parse_having(), 789 exp.Hint: lambda self: self._parse_hint_body(), 790 exp.Identifier: lambda self: self._parse_id_var(), 791 exp.Join: lambda self: self._parse_join(), 792 exp.Lambda: lambda self: self._parse_lambda(), 793 exp.Lateral: lambda self: self._parse_lateral(), 794 exp.Limit: lambda self: self._parse_limit(), 795 exp.Offset: lambda self: self._parse_offset(), 796 exp.Order: lambda self: self._parse_order(), 797 exp.Ordered: lambda self: self._parse_ordered(), 798 exp.Properties: lambda self: self._parse_properties(), 799 exp.Qualify: lambda self: self._parse_qualify(), 800 exp.Returning: lambda self: self._parse_returning(), 801 exp.Select: lambda self: self._parse_select(), 802 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 803 exp.Table: lambda self: self._parse_table_parts(), 804 exp.TableAlias: lambda self: self._parse_table_alias(), 805 exp.Tuple: lambda self: self._parse_value(), 806 exp.Whens: lambda self: self._parse_when_matched(), 807 exp.Where: lambda self: self._parse_where(), 808 exp.Window: lambda self: self._parse_named_window(), 809 exp.With: lambda self: self._parse_with(), 810 "JOIN_TYPE": lambda self: self._parse_join_parts(), 811 } 812 813 STATEMENT_PARSERS = { 814 TokenType.ALTER: lambda self: self._parse_alter(), 815 TokenType.ANALYZE: lambda self: self._parse_analyze(), 816 TokenType.BEGIN: lambda self: self._parse_transaction(), 817 TokenType.CACHE: lambda self: self._parse_cache(), 818 TokenType.COMMENT: lambda self: self._parse_comment(), 819 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 820 TokenType.COPY: lambda self: self._parse_copy(), 821 TokenType.CREATE: lambda self: self._parse_create(), 822 TokenType.DELETE: lambda self: self._parse_delete(), 823 TokenType.DESC: lambda self: self._parse_describe(), 824 TokenType.DESCRIBE: lambda self: self._parse_describe(), 825 TokenType.DROP: lambda self: self._parse_drop(), 826 TokenType.GRANT: lambda self: self._parse_grant(), 827 TokenType.INSERT: lambda self: self._parse_insert(), 828 TokenType.KILL: lambda self: self._parse_kill(), 829 TokenType.LOAD: lambda self: self._parse_load(), 830 TokenType.MERGE: lambda self: self._parse_merge(), 831 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 832 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 833 TokenType.REFRESH: lambda self: self._parse_refresh(), 834 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 835 TokenType.SET: lambda self: self._parse_set(), 836 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 837 TokenType.UNCACHE: lambda self: self._parse_uncache(), 838 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 839 TokenType.UPDATE: lambda self: self._parse_update(), 840 TokenType.USE: lambda self: self._parse_use(), 841 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 842 } 843 844 UNARY_PARSERS = { 845 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 846 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 847 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 848 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 849 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 850 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 851 } 852 853 STRING_PARSERS = { 854 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 855 exp.RawString, this=token.text 856 ), 857 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 858 exp.National, this=token.text 859 ), 860 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 861 TokenType.STRING: lambda self, token: self.expression( 862 exp.Literal, this=token.text, is_string=True 863 ), 864 TokenType.UNICODE_STRING: lambda self, token: self.expression( 865 exp.UnicodeString, 866 this=token.text, 867 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 868 ), 869 } 870 871 NUMERIC_PARSERS = { 872 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 873 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 874 TokenType.HEX_STRING: lambda self, token: self.expression( 875 exp.HexString, 876 this=token.text, 877 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 878 ), 879 TokenType.NUMBER: lambda self, token: self.expression( 880 exp.Literal, this=token.text, is_string=False 881 ), 882 } 883 884 PRIMARY_PARSERS = { 885 **STRING_PARSERS, 886 **NUMERIC_PARSERS, 887 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 888 TokenType.NULL: lambda self, _: self.expression(exp.Null), 889 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 890 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 891 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 892 TokenType.STAR: lambda self, _: self._parse_star_ops(), 893 } 894 895 PLACEHOLDER_PARSERS = { 896 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 897 TokenType.PARAMETER: lambda self: self._parse_parameter(), 898 TokenType.COLON: lambda self: ( 899 self.expression(exp.Placeholder, this=self._prev.text) 900 if self._match_set(self.ID_VAR_TOKENS) 901 else None 902 ), 903 } 904 905 RANGE_PARSERS = { 906 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 907 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 908 TokenType.GLOB: binary_range_parser(exp.Glob), 909 TokenType.ILIKE: binary_range_parser(exp.ILike), 910 TokenType.IN: lambda self, this: self._parse_in(this), 911 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 912 TokenType.IS: lambda self, this: self._parse_is(this), 913 TokenType.LIKE: binary_range_parser(exp.Like), 914 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 915 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 916 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 917 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 918 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 919 } 920 921 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 922 "ALLOWED_VALUES": lambda self: self.expression( 923 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 924 ), 925 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 926 "AUTO": lambda self: self._parse_auto_property(), 927 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 928 "BACKUP": lambda self: self.expression( 929 exp.BackupProperty, this=self._parse_var(any_token=True) 930 ), 931 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 932 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 933 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 934 "CHECKSUM": lambda self: self._parse_checksum(), 935 "CLUSTER BY": lambda self: self._parse_cluster(), 936 "CLUSTERED": lambda self: self._parse_clustered_by(), 937 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 938 exp.CollateProperty, **kwargs 939 ), 940 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 941 "CONTAINS": lambda self: self._parse_contains_property(), 942 "COPY": lambda self: self._parse_copy_property(), 943 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 944 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 945 "DEFINER": lambda self: self._parse_definer(), 946 "DETERMINISTIC": lambda self: self.expression( 947 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 948 ), 949 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 950 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 951 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 952 "DISTKEY": lambda self: self._parse_distkey(), 953 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 954 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 955 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 956 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 957 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 958 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 959 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 960 "FREESPACE": lambda self: self._parse_freespace(), 961 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 962 "HEAP": lambda self: self.expression(exp.HeapProperty), 963 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 964 "IMMUTABLE": lambda self: self.expression( 965 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 966 ), 967 "INHERITS": lambda self: self.expression( 968 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 969 ), 970 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 971 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 972 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 973 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 974 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 975 "LIKE": lambda self: self._parse_create_like(), 976 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 977 "LOCK": lambda self: self._parse_locking(), 978 "LOCKING": lambda self: self._parse_locking(), 979 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 980 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 981 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 982 "MODIFIES": lambda self: self._parse_modifies_property(), 983 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 984 "NO": lambda self: self._parse_no_property(), 985 "ON": lambda self: self._parse_on_property(), 986 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 987 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 988 "PARTITION": lambda self: self._parse_partitioned_of(), 989 "PARTITION BY": lambda self: self._parse_partitioned_by(), 990 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 991 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 992 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 993 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 994 "READS": lambda self: self._parse_reads_property(), 995 "REMOTE": lambda self: self._parse_remote_with_connection(), 996 "RETURNS": lambda self: self._parse_returns(), 997 "STRICT": lambda self: self.expression(exp.StrictProperty), 998 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 999 "ROW": lambda self: self._parse_row(), 1000 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1001 "SAMPLE": lambda self: self.expression( 1002 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1003 ), 1004 "SECURE": lambda self: self.expression(exp.SecureProperty), 1005 "SECURITY": lambda self: self._parse_security(), 1006 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1007 "SETTINGS": lambda self: self._parse_settings_property(), 1008 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1009 "SORTKEY": lambda self: self._parse_sortkey(), 1010 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1011 "STABLE": lambda self: self.expression( 1012 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1013 ), 1014 "STORED": lambda self: self._parse_stored(), 1015 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1016 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1017 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1018 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1019 "TO": lambda self: self._parse_to_table(), 1020 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1021 "TRANSFORM": lambda self: self.expression( 1022 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1023 ), 1024 "TTL": lambda self: self._parse_ttl(), 1025 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1026 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1027 "VOLATILE": lambda self: self._parse_volatile_property(), 1028 "WITH": lambda self: self._parse_with_property(), 1029 } 1030 1031 CONSTRAINT_PARSERS = { 1032 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1033 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1034 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1035 "CHARACTER SET": lambda self: self.expression( 1036 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1037 ), 1038 "CHECK": lambda self: self.expression( 1039 exp.CheckColumnConstraint, 1040 this=self._parse_wrapped(self._parse_assignment), 1041 enforced=self._match_text_seq("ENFORCED"), 1042 ), 1043 "COLLATE": lambda self: self.expression( 1044 exp.CollateColumnConstraint, 1045 this=self._parse_identifier() or self._parse_column(), 1046 ), 1047 "COMMENT": lambda self: self.expression( 1048 exp.CommentColumnConstraint, this=self._parse_string() 1049 ), 1050 "COMPRESS": lambda self: self._parse_compress(), 1051 "CLUSTERED": lambda self: self.expression( 1052 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1053 ), 1054 "NONCLUSTERED": lambda self: self.expression( 1055 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1056 ), 1057 "DEFAULT": lambda self: self.expression( 1058 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1059 ), 1060 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1061 "EPHEMERAL": lambda self: self.expression( 1062 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1063 ), 1064 "EXCLUDE": lambda self: self.expression( 1065 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1066 ), 1067 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1068 "FORMAT": lambda self: self.expression( 1069 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1070 ), 1071 "GENERATED": lambda self: self._parse_generated_as_identity(), 1072 "IDENTITY": lambda self: self._parse_auto_increment(), 1073 "INLINE": lambda self: self._parse_inline(), 1074 "LIKE": lambda self: self._parse_create_like(), 1075 "NOT": lambda self: self._parse_not_constraint(), 1076 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1077 "ON": lambda self: ( 1078 self._match(TokenType.UPDATE) 1079 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1080 ) 1081 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1082 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1083 "PERIOD": lambda self: self._parse_period_for_system_time(), 1084 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1085 "REFERENCES": lambda self: self._parse_references(match=False), 1086 "TITLE": lambda self: self.expression( 1087 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1088 ), 1089 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1090 "UNIQUE": lambda self: self._parse_unique(), 1091 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1092 "WATERMARK": lambda self: self.expression( 1093 exp.WatermarkColumnConstraint, 1094 this=self._match(TokenType.FOR) and self._parse_column(), 1095 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1096 ), 1097 "WITH": lambda self: self.expression( 1098 exp.Properties, expressions=self._parse_wrapped_properties() 1099 ), 1100 } 1101 1102 ALTER_PARSERS = { 1103 "ADD": lambda self: self._parse_alter_table_add(), 1104 "AS": lambda self: self._parse_select(), 1105 "ALTER": lambda self: self._parse_alter_table_alter(), 1106 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1107 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1108 "DROP": lambda self: self._parse_alter_table_drop(), 1109 "RENAME": lambda self: self._parse_alter_table_rename(), 1110 "SET": lambda self: self._parse_alter_table_set(), 1111 "SWAP": lambda self: self.expression( 1112 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1113 ), 1114 } 1115 1116 ALTER_ALTER_PARSERS = { 1117 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1118 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1119 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1120 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1121 } 1122 1123 SCHEMA_UNNAMED_CONSTRAINTS = { 1124 "CHECK", 1125 "EXCLUDE", 1126 "FOREIGN KEY", 1127 "LIKE", 1128 "PERIOD", 1129 "PRIMARY KEY", 1130 "UNIQUE", 1131 "WATERMARK", 1132 } 1133 1134 NO_PAREN_FUNCTION_PARSERS = { 1135 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1136 "CASE": lambda self: self._parse_case(), 1137 "CONNECT_BY_ROOT": lambda self: self.expression( 1138 exp.ConnectByRoot, this=self._parse_column() 1139 ), 1140 "IF": lambda self: self._parse_if(), 1141 } 1142 1143 INVALID_FUNC_NAME_TOKENS = { 1144 TokenType.IDENTIFIER, 1145 TokenType.STRING, 1146 } 1147 1148 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1149 1150 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1151 1152 FUNCTION_PARSERS = { 1153 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1154 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1155 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1156 "DECODE": lambda self: self._parse_decode(), 1157 "EXTRACT": lambda self: self._parse_extract(), 1158 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1159 "GAP_FILL": lambda self: self._parse_gap_fill(), 1160 "JSON_OBJECT": lambda self: self._parse_json_object(), 1161 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1162 "JSON_TABLE": lambda self: self._parse_json_table(), 1163 "MATCH": lambda self: self._parse_match_against(), 1164 "NORMALIZE": lambda self: self._parse_normalize(), 1165 "OPENJSON": lambda self: self._parse_open_json(), 1166 "OVERLAY": lambda self: self._parse_overlay(), 1167 "POSITION": lambda self: self._parse_position(), 1168 "PREDICT": lambda self: self._parse_predict(), 1169 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1170 "STRING_AGG": lambda self: self._parse_string_agg(), 1171 "SUBSTRING": lambda self: self._parse_substring(), 1172 "TRIM": lambda self: self._parse_trim(), 1173 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1174 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1175 "XMLELEMENT": lambda self: self.expression( 1176 exp.XMLElement, 1177 this=self._match_text_seq("NAME") and self._parse_id_var(), 1178 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1179 ), 1180 "XMLTABLE": lambda self: self._parse_xml_table(), 1181 } 1182 1183 QUERY_MODIFIER_PARSERS = { 1184 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1185 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1186 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1187 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1188 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1189 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1190 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1191 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1192 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1193 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1194 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1195 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1196 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1197 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1198 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1199 TokenType.CLUSTER_BY: lambda self: ( 1200 "cluster", 1201 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1202 ), 1203 TokenType.DISTRIBUTE_BY: lambda self: ( 1204 "distribute", 1205 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1206 ), 1207 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1208 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1209 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1210 } 1211 1212 SET_PARSERS = { 1213 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1214 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1215 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1216 "TRANSACTION": lambda self: self._parse_set_transaction(), 1217 } 1218 1219 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1220 1221 TYPE_LITERAL_PARSERS = { 1222 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1223 } 1224 1225 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1226 1227 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1228 1229 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1230 1231 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1232 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1233 "ISOLATION": ( 1234 ("LEVEL", "REPEATABLE", "READ"), 1235 ("LEVEL", "READ", "COMMITTED"), 1236 ("LEVEL", "READ", "UNCOMITTED"), 1237 ("LEVEL", "SERIALIZABLE"), 1238 ), 1239 "READ": ("WRITE", "ONLY"), 1240 } 1241 1242 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1243 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1244 ) 1245 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1246 1247 CREATE_SEQUENCE: OPTIONS_TYPE = { 1248 "SCALE": ("EXTEND", "NOEXTEND"), 1249 "SHARD": ("EXTEND", "NOEXTEND"), 1250 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1251 **dict.fromkeys( 1252 ( 1253 "SESSION", 1254 "GLOBAL", 1255 "KEEP", 1256 "NOKEEP", 1257 "ORDER", 1258 "NOORDER", 1259 "NOCACHE", 1260 "CYCLE", 1261 "NOCYCLE", 1262 "NOMINVALUE", 1263 "NOMAXVALUE", 1264 "NOSCALE", 1265 "NOSHARD", 1266 ), 1267 tuple(), 1268 ), 1269 } 1270 1271 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1272 1273 USABLES: OPTIONS_TYPE = dict.fromkeys( 1274 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1275 ) 1276 1277 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1278 1279 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1280 "TYPE": ("EVOLUTION",), 1281 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1282 } 1283 1284 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1285 1286 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1287 1288 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1289 "NOT": ("ENFORCED",), 1290 "MATCH": ( 1291 "FULL", 1292 "PARTIAL", 1293 "SIMPLE", 1294 ), 1295 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1296 "USING": ( 1297 "BTREE", 1298 "HASH", 1299 ), 1300 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1301 } 1302 1303 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1304 1305 CLONE_KEYWORDS = {"CLONE", "COPY"} 1306 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1307 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1308 1309 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1310 1311 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1312 1313 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1314 1315 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1316 1317 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1318 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1319 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1320 1321 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1322 1323 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1324 1325 ADD_CONSTRAINT_TOKENS = { 1326 TokenType.CONSTRAINT, 1327 TokenType.FOREIGN_KEY, 1328 TokenType.INDEX, 1329 TokenType.KEY, 1330 TokenType.PRIMARY_KEY, 1331 TokenType.UNIQUE, 1332 } 1333 1334 DISTINCT_TOKENS = {TokenType.DISTINCT} 1335 1336 NULL_TOKENS = {TokenType.NULL} 1337 1338 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1339 1340 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1341 1342 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1343 1344 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1345 1346 ODBC_DATETIME_LITERALS = { 1347 "d": exp.Date, 1348 "t": exp.Time, 1349 "ts": exp.Timestamp, 1350 } 1351 1352 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1353 1354 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1355 1356 # The style options for the DESCRIBE statement 1357 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1358 1359 # The style options for the ANALYZE statement 1360 ANALYZE_STYLES = { 1361 "BUFFER_USAGE_LIMIT", 1362 "FULL", 1363 "LOCAL", 1364 "NO_WRITE_TO_BINLOG", 1365 "SAMPLE", 1366 "SKIP_LOCKED", 1367 "VERBOSE", 1368 } 1369 1370 ANALYZE_EXPRESSION_PARSERS = { 1371 "ALL": lambda self: self._parse_analyze_columns(), 1372 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1373 "DELETE": lambda self: self._parse_analyze_delete(), 1374 "DROP": lambda self: self._parse_analyze_histogram(), 1375 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1376 "LIST": lambda self: self._parse_analyze_list(), 1377 "PREDICATE": lambda self: self._parse_analyze_columns(), 1378 "UPDATE": lambda self: self._parse_analyze_histogram(), 1379 "VALIDATE": lambda self: self._parse_analyze_validate(), 1380 } 1381 1382 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1383 1384 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1385 1386 OPERATION_MODIFIERS: t.Set[str] = set() 1387 1388 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1389 1390 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1391 1392 STRICT_CAST = True 1393 1394 PREFIXED_PIVOT_COLUMNS = False 1395 IDENTIFY_PIVOT_STRINGS = False 1396 1397 LOG_DEFAULTS_TO_LN = False 1398 1399 # Whether ADD is present for each column added by ALTER TABLE 1400 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1401 1402 # Whether the table sample clause expects CSV syntax 1403 TABLESAMPLE_CSV = False 1404 1405 # The default method used for table sampling 1406 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1407 1408 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1409 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1410 1411 # Whether the TRIM function expects the characters to trim as its first argument 1412 TRIM_PATTERN_FIRST = False 1413 1414 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1415 STRING_ALIASES = False 1416 1417 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1418 MODIFIERS_ATTACHED_TO_SET_OP = True 1419 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1420 1421 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1422 NO_PAREN_IF_COMMANDS = True 1423 1424 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1425 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1426 1427 # Whether the `:` operator is used to extract a value from a VARIANT column 1428 COLON_IS_VARIANT_EXTRACT = False 1429 1430 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1431 # If this is True and '(' is not found, the keyword will be treated as an identifier 1432 VALUES_FOLLOWED_BY_PAREN = True 1433 1434 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1435 SUPPORTS_IMPLICIT_UNNEST = False 1436 1437 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1438 INTERVAL_SPANS = True 1439 1440 # Whether a PARTITION clause can follow a table reference 1441 SUPPORTS_PARTITION_SELECTION = False 1442 1443 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1444 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1445 1446 # Whether the 'AS' keyword is optional in the CTE definition syntax 1447 OPTIONAL_ALIAS_TOKEN_CTE = True 1448 1449 __slots__ = ( 1450 "error_level", 1451 "error_message_context", 1452 "max_errors", 1453 "dialect", 1454 "sql", 1455 "errors", 1456 "_tokens", 1457 "_index", 1458 "_curr", 1459 "_next", 1460 "_prev", 1461 "_prev_comments", 1462 ) 1463 1464 # Autofilled 1465 SHOW_TRIE: t.Dict = {} 1466 SET_TRIE: t.Dict = {} 1467 1468 def __init__( 1469 self, 1470 error_level: t.Optional[ErrorLevel] = None, 1471 error_message_context: int = 100, 1472 max_errors: int = 3, 1473 dialect: DialectType = None, 1474 ): 1475 from sqlglot.dialects import Dialect 1476 1477 self.error_level = error_level or ErrorLevel.IMMEDIATE 1478 self.error_message_context = error_message_context 1479 self.max_errors = max_errors 1480 self.dialect = Dialect.get_or_raise(dialect) 1481 self.reset() 1482 1483 def reset(self): 1484 self.sql = "" 1485 self.errors = [] 1486 self._tokens = [] 1487 self._index = 0 1488 self._curr = None 1489 self._next = None 1490 self._prev = None 1491 self._prev_comments = None 1492 1493 def parse( 1494 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1495 ) -> t.List[t.Optional[exp.Expression]]: 1496 """ 1497 Parses a list of tokens and returns a list of syntax trees, one tree 1498 per parsed SQL statement. 1499 1500 Args: 1501 raw_tokens: The list of tokens. 1502 sql: The original SQL string, used to produce helpful debug messages. 1503 1504 Returns: 1505 The list of the produced syntax trees. 1506 """ 1507 return self._parse( 1508 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1509 ) 1510 1511 def parse_into( 1512 self, 1513 expression_types: exp.IntoType, 1514 raw_tokens: t.List[Token], 1515 sql: t.Optional[str] = None, 1516 ) -> t.List[t.Optional[exp.Expression]]: 1517 """ 1518 Parses a list of tokens into a given Expression type. If a collection of Expression 1519 types is given instead, this method will try to parse the token list into each one 1520 of them, stopping at the first for which the parsing succeeds. 1521 1522 Args: 1523 expression_types: The expression type(s) to try and parse the token list into. 1524 raw_tokens: The list of tokens. 1525 sql: The original SQL string, used to produce helpful debug messages. 1526 1527 Returns: 1528 The target Expression. 1529 """ 1530 errors = [] 1531 for expression_type in ensure_list(expression_types): 1532 parser = self.EXPRESSION_PARSERS.get(expression_type) 1533 if not parser: 1534 raise TypeError(f"No parser registered for {expression_type}") 1535 1536 try: 1537 return self._parse(parser, raw_tokens, sql) 1538 except ParseError as e: 1539 e.errors[0]["into_expression"] = expression_type 1540 errors.append(e) 1541 1542 raise ParseError( 1543 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1544 errors=merge_errors(errors), 1545 ) from errors[-1] 1546 1547 def _parse( 1548 self, 1549 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1550 raw_tokens: t.List[Token], 1551 sql: t.Optional[str] = None, 1552 ) -> t.List[t.Optional[exp.Expression]]: 1553 self.reset() 1554 self.sql = sql or "" 1555 1556 total = len(raw_tokens) 1557 chunks: t.List[t.List[Token]] = [[]] 1558 1559 for i, token in enumerate(raw_tokens): 1560 if token.token_type == TokenType.SEMICOLON: 1561 if token.comments: 1562 chunks.append([token]) 1563 1564 if i < total - 1: 1565 chunks.append([]) 1566 else: 1567 chunks[-1].append(token) 1568 1569 expressions = [] 1570 1571 for tokens in chunks: 1572 self._index = -1 1573 self._tokens = tokens 1574 self._advance() 1575 1576 expressions.append(parse_method(self)) 1577 1578 if self._index < len(self._tokens): 1579 self.raise_error("Invalid expression / Unexpected token") 1580 1581 self.check_errors() 1582 1583 return expressions 1584 1585 def check_errors(self) -> None: 1586 """Logs or raises any found errors, depending on the chosen error level setting.""" 1587 if self.error_level == ErrorLevel.WARN: 1588 for error in self.errors: 1589 logger.error(str(error)) 1590 elif self.error_level == ErrorLevel.RAISE and self.errors: 1591 raise ParseError( 1592 concat_messages(self.errors, self.max_errors), 1593 errors=merge_errors(self.errors), 1594 ) 1595 1596 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1597 """ 1598 Appends an error in the list of recorded errors or raises it, depending on the chosen 1599 error level setting. 1600 """ 1601 token = token or self._curr or self._prev or Token.string("") 1602 start = token.start 1603 end = token.end + 1 1604 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1605 highlight = self.sql[start:end] 1606 end_context = self.sql[end : end + self.error_message_context] 1607 1608 error = ParseError.new( 1609 f"{message}. Line {token.line}, Col: {token.col}.\n" 1610 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1611 description=message, 1612 line=token.line, 1613 col=token.col, 1614 start_context=start_context, 1615 highlight=highlight, 1616 end_context=end_context, 1617 ) 1618 1619 if self.error_level == ErrorLevel.IMMEDIATE: 1620 raise error 1621 1622 self.errors.append(error) 1623 1624 def expression( 1625 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1626 ) -> E: 1627 """ 1628 Creates a new, validated Expression. 1629 1630 Args: 1631 exp_class: The expression class to instantiate. 1632 comments: An optional list of comments to attach to the expression. 1633 kwargs: The arguments to set for the expression along with their respective values. 1634 1635 Returns: 1636 The target expression. 1637 """ 1638 instance = exp_class(**kwargs) 1639 instance.add_comments(comments) if comments else self._add_comments(instance) 1640 return self.validate_expression(instance) 1641 1642 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1643 if expression and self._prev_comments: 1644 expression.add_comments(self._prev_comments) 1645 self._prev_comments = None 1646 1647 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1648 """ 1649 Validates an Expression, making sure that all its mandatory arguments are set. 1650 1651 Args: 1652 expression: The expression to validate. 1653 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1654 1655 Returns: 1656 The validated expression. 1657 """ 1658 if self.error_level != ErrorLevel.IGNORE: 1659 for error_message in expression.error_messages(args): 1660 self.raise_error(error_message) 1661 1662 return expression 1663 1664 def _find_sql(self, start: Token, end: Token) -> str: 1665 return self.sql[start.start : end.end + 1] 1666 1667 def _is_connected(self) -> bool: 1668 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1669 1670 def _advance(self, times: int = 1) -> None: 1671 self._index += times 1672 self._curr = seq_get(self._tokens, self._index) 1673 self._next = seq_get(self._tokens, self._index + 1) 1674 1675 if self._index > 0: 1676 self._prev = self._tokens[self._index - 1] 1677 self._prev_comments = self._prev.comments 1678 else: 1679 self._prev = None 1680 self._prev_comments = None 1681 1682 def _retreat(self, index: int) -> None: 1683 if index != self._index: 1684 self._advance(index - self._index) 1685 1686 def _warn_unsupported(self) -> None: 1687 if len(self._tokens) <= 1: 1688 return 1689 1690 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1691 # interested in emitting a warning for the one being currently processed. 1692 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1693 1694 logger.warning( 1695 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1696 ) 1697 1698 def _parse_command(self) -> exp.Command: 1699 self._warn_unsupported() 1700 return self.expression( 1701 exp.Command, 1702 comments=self._prev_comments, 1703 this=self._prev.text.upper(), 1704 expression=self._parse_string(), 1705 ) 1706 1707 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1708 """ 1709 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1710 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1711 solve this by setting & resetting the parser state accordingly 1712 """ 1713 index = self._index 1714 error_level = self.error_level 1715 1716 self.error_level = ErrorLevel.IMMEDIATE 1717 try: 1718 this = parse_method() 1719 except ParseError: 1720 this = None 1721 finally: 1722 if not this or retreat: 1723 self._retreat(index) 1724 self.error_level = error_level 1725 1726 return this 1727 1728 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1729 start = self._prev 1730 exists = self._parse_exists() if allow_exists else None 1731 1732 self._match(TokenType.ON) 1733 1734 materialized = self._match_text_seq("MATERIALIZED") 1735 kind = self._match_set(self.CREATABLES) and self._prev 1736 if not kind: 1737 return self._parse_as_command(start) 1738 1739 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1740 this = self._parse_user_defined_function(kind=kind.token_type) 1741 elif kind.token_type == TokenType.TABLE: 1742 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1743 elif kind.token_type == TokenType.COLUMN: 1744 this = self._parse_column() 1745 else: 1746 this = self._parse_id_var() 1747 1748 self._match(TokenType.IS) 1749 1750 return self.expression( 1751 exp.Comment, 1752 this=this, 1753 kind=kind.text, 1754 expression=self._parse_string(), 1755 exists=exists, 1756 materialized=materialized, 1757 ) 1758 1759 def _parse_to_table( 1760 self, 1761 ) -> exp.ToTableProperty: 1762 table = self._parse_table_parts(schema=True) 1763 return self.expression(exp.ToTableProperty, this=table) 1764 1765 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1766 def _parse_ttl(self) -> exp.Expression: 1767 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1768 this = self._parse_bitwise() 1769 1770 if self._match_text_seq("DELETE"): 1771 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1772 if self._match_text_seq("RECOMPRESS"): 1773 return self.expression( 1774 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1775 ) 1776 if self._match_text_seq("TO", "DISK"): 1777 return self.expression( 1778 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1779 ) 1780 if self._match_text_seq("TO", "VOLUME"): 1781 return self.expression( 1782 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1783 ) 1784 1785 return this 1786 1787 expressions = self._parse_csv(_parse_ttl_action) 1788 where = self._parse_where() 1789 group = self._parse_group() 1790 1791 aggregates = None 1792 if group and self._match(TokenType.SET): 1793 aggregates = self._parse_csv(self._parse_set_item) 1794 1795 return self.expression( 1796 exp.MergeTreeTTL, 1797 expressions=expressions, 1798 where=where, 1799 group=group, 1800 aggregates=aggregates, 1801 ) 1802 1803 def _parse_statement(self) -> t.Optional[exp.Expression]: 1804 if self._curr is None: 1805 return None 1806 1807 if self._match_set(self.STATEMENT_PARSERS): 1808 comments = self._prev_comments 1809 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1810 stmt.add_comments(comments, prepend=True) 1811 return stmt 1812 1813 if self._match_set(self.dialect.tokenizer.COMMANDS): 1814 return self._parse_command() 1815 1816 expression = self._parse_expression() 1817 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1818 return self._parse_query_modifiers(expression) 1819 1820 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1821 start = self._prev 1822 temporary = self._match(TokenType.TEMPORARY) 1823 materialized = self._match_text_seq("MATERIALIZED") 1824 1825 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1826 if not kind: 1827 return self._parse_as_command(start) 1828 1829 concurrently = self._match_text_seq("CONCURRENTLY") 1830 if_exists = exists or self._parse_exists() 1831 1832 if kind == "COLUMN": 1833 this = self._parse_column() 1834 else: 1835 this = self._parse_table_parts( 1836 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1837 ) 1838 1839 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1840 1841 if self._match(TokenType.L_PAREN, advance=False): 1842 expressions = self._parse_wrapped_csv(self._parse_types) 1843 else: 1844 expressions = None 1845 1846 return self.expression( 1847 exp.Drop, 1848 exists=if_exists, 1849 this=this, 1850 expressions=expressions, 1851 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1852 temporary=temporary, 1853 materialized=materialized, 1854 cascade=self._match_text_seq("CASCADE"), 1855 constraints=self._match_text_seq("CONSTRAINTS"), 1856 purge=self._match_text_seq("PURGE"), 1857 cluster=cluster, 1858 concurrently=concurrently, 1859 ) 1860 1861 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1862 return ( 1863 self._match_text_seq("IF") 1864 and (not not_ or self._match(TokenType.NOT)) 1865 and self._match(TokenType.EXISTS) 1866 ) 1867 1868 def _parse_create(self) -> exp.Create | exp.Command: 1869 # Note: this can't be None because we've matched a statement parser 1870 start = self._prev 1871 1872 replace = ( 1873 start.token_type == TokenType.REPLACE 1874 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1875 or self._match_pair(TokenType.OR, TokenType.ALTER) 1876 ) 1877 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1878 1879 unique = self._match(TokenType.UNIQUE) 1880 1881 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1882 clustered = True 1883 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1884 "COLUMNSTORE" 1885 ): 1886 clustered = False 1887 else: 1888 clustered = None 1889 1890 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1891 self._advance() 1892 1893 properties = None 1894 create_token = self._match_set(self.CREATABLES) and self._prev 1895 1896 if not create_token: 1897 # exp.Properties.Location.POST_CREATE 1898 properties = self._parse_properties() 1899 create_token = self._match_set(self.CREATABLES) and self._prev 1900 1901 if not properties or not create_token: 1902 return self._parse_as_command(start) 1903 1904 concurrently = self._match_text_seq("CONCURRENTLY") 1905 exists = self._parse_exists(not_=True) 1906 this = None 1907 expression: t.Optional[exp.Expression] = None 1908 indexes = None 1909 no_schema_binding = None 1910 begin = None 1911 end = None 1912 clone = None 1913 1914 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1915 nonlocal properties 1916 if properties and temp_props: 1917 properties.expressions.extend(temp_props.expressions) 1918 elif temp_props: 1919 properties = temp_props 1920 1921 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1922 this = self._parse_user_defined_function(kind=create_token.token_type) 1923 1924 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1925 extend_props(self._parse_properties()) 1926 1927 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1928 extend_props(self._parse_properties()) 1929 1930 if not expression: 1931 if self._match(TokenType.COMMAND): 1932 expression = self._parse_as_command(self._prev) 1933 else: 1934 begin = self._match(TokenType.BEGIN) 1935 return_ = self._match_text_seq("RETURN") 1936 1937 if self._match(TokenType.STRING, advance=False): 1938 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1939 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1940 expression = self._parse_string() 1941 extend_props(self._parse_properties()) 1942 else: 1943 expression = self._parse_user_defined_function_expression() 1944 1945 end = self._match_text_seq("END") 1946 1947 if return_: 1948 expression = self.expression(exp.Return, this=expression) 1949 elif create_token.token_type == TokenType.INDEX: 1950 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1951 if not self._match(TokenType.ON): 1952 index = self._parse_id_var() 1953 anonymous = False 1954 else: 1955 index = None 1956 anonymous = True 1957 1958 this = self._parse_index(index=index, anonymous=anonymous) 1959 elif create_token.token_type in self.DB_CREATABLES: 1960 table_parts = self._parse_table_parts( 1961 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1962 ) 1963 1964 # exp.Properties.Location.POST_NAME 1965 self._match(TokenType.COMMA) 1966 extend_props(self._parse_properties(before=True)) 1967 1968 this = self._parse_schema(this=table_parts) 1969 1970 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1971 extend_props(self._parse_properties()) 1972 1973 self._match(TokenType.ALIAS) 1974 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1975 # exp.Properties.Location.POST_ALIAS 1976 extend_props(self._parse_properties()) 1977 1978 if create_token.token_type == TokenType.SEQUENCE: 1979 expression = self._parse_types() 1980 extend_props(self._parse_properties()) 1981 else: 1982 expression = self._parse_ddl_select() 1983 1984 if create_token.token_type == TokenType.TABLE: 1985 # exp.Properties.Location.POST_EXPRESSION 1986 extend_props(self._parse_properties()) 1987 1988 indexes = [] 1989 while True: 1990 index = self._parse_index() 1991 1992 # exp.Properties.Location.POST_INDEX 1993 extend_props(self._parse_properties()) 1994 if not index: 1995 break 1996 else: 1997 self._match(TokenType.COMMA) 1998 indexes.append(index) 1999 elif create_token.token_type == TokenType.VIEW: 2000 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2001 no_schema_binding = True 2002 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2003 extend_props(self._parse_properties()) 2004 2005 shallow = self._match_text_seq("SHALLOW") 2006 2007 if self._match_texts(self.CLONE_KEYWORDS): 2008 copy = self._prev.text.lower() == "copy" 2009 clone = self.expression( 2010 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2011 ) 2012 2013 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2014 return self._parse_as_command(start) 2015 2016 create_kind_text = create_token.text.upper() 2017 return self.expression( 2018 exp.Create, 2019 this=this, 2020 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2021 replace=replace, 2022 refresh=refresh, 2023 unique=unique, 2024 expression=expression, 2025 exists=exists, 2026 properties=properties, 2027 indexes=indexes, 2028 no_schema_binding=no_schema_binding, 2029 begin=begin, 2030 end=end, 2031 clone=clone, 2032 concurrently=concurrently, 2033 clustered=clustered, 2034 ) 2035 2036 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2037 seq = exp.SequenceProperties() 2038 2039 options = [] 2040 index = self._index 2041 2042 while self._curr: 2043 self._match(TokenType.COMMA) 2044 if self._match_text_seq("INCREMENT"): 2045 self._match_text_seq("BY") 2046 self._match_text_seq("=") 2047 seq.set("increment", self._parse_term()) 2048 elif self._match_text_seq("MINVALUE"): 2049 seq.set("minvalue", self._parse_term()) 2050 elif self._match_text_seq("MAXVALUE"): 2051 seq.set("maxvalue", self._parse_term()) 2052 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2053 self._match_text_seq("=") 2054 seq.set("start", self._parse_term()) 2055 elif self._match_text_seq("CACHE"): 2056 # T-SQL allows empty CACHE which is initialized dynamically 2057 seq.set("cache", self._parse_number() or True) 2058 elif self._match_text_seq("OWNED", "BY"): 2059 # "OWNED BY NONE" is the default 2060 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2061 else: 2062 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2063 if opt: 2064 options.append(opt) 2065 else: 2066 break 2067 2068 seq.set("options", options if options else None) 2069 return None if self._index == index else seq 2070 2071 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2072 # only used for teradata currently 2073 self._match(TokenType.COMMA) 2074 2075 kwargs = { 2076 "no": self._match_text_seq("NO"), 2077 "dual": self._match_text_seq("DUAL"), 2078 "before": self._match_text_seq("BEFORE"), 2079 "default": self._match_text_seq("DEFAULT"), 2080 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2081 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2082 "after": self._match_text_seq("AFTER"), 2083 "minimum": self._match_texts(("MIN", "MINIMUM")), 2084 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2085 } 2086 2087 if self._match_texts(self.PROPERTY_PARSERS): 2088 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2089 try: 2090 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2091 except TypeError: 2092 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2093 2094 return None 2095 2096 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2097 return self._parse_wrapped_csv(self._parse_property) 2098 2099 def _parse_property(self) -> t.Optional[exp.Expression]: 2100 if self._match_texts(self.PROPERTY_PARSERS): 2101 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2102 2103 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2104 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2105 2106 if self._match_text_seq("COMPOUND", "SORTKEY"): 2107 return self._parse_sortkey(compound=True) 2108 2109 if self._match_text_seq("SQL", "SECURITY"): 2110 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2111 2112 index = self._index 2113 key = self._parse_column() 2114 2115 if not self._match(TokenType.EQ): 2116 self._retreat(index) 2117 return self._parse_sequence_properties() 2118 2119 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2120 if isinstance(key, exp.Column): 2121 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2122 2123 value = self._parse_bitwise() or self._parse_var(any_token=True) 2124 2125 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2126 if isinstance(value, exp.Column): 2127 value = exp.var(value.name) 2128 2129 return self.expression(exp.Property, this=key, value=value) 2130 2131 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2132 if self._match_text_seq("BY"): 2133 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2134 2135 self._match(TokenType.ALIAS) 2136 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2137 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2138 2139 return self.expression( 2140 exp.FileFormatProperty, 2141 this=( 2142 self.expression( 2143 exp.InputOutputFormat, 2144 input_format=input_format, 2145 output_format=output_format, 2146 ) 2147 if input_format or output_format 2148 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2149 ), 2150 ) 2151 2152 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2153 field = self._parse_field() 2154 if isinstance(field, exp.Identifier) and not field.quoted: 2155 field = exp.var(field) 2156 2157 return field 2158 2159 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2160 self._match(TokenType.EQ) 2161 self._match(TokenType.ALIAS) 2162 2163 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2164 2165 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2166 properties = [] 2167 while True: 2168 if before: 2169 prop = self._parse_property_before() 2170 else: 2171 prop = self._parse_property() 2172 if not prop: 2173 break 2174 for p in ensure_list(prop): 2175 properties.append(p) 2176 2177 if properties: 2178 return self.expression(exp.Properties, expressions=properties) 2179 2180 return None 2181 2182 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2183 return self.expression( 2184 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2185 ) 2186 2187 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2188 if self._match_texts(("DEFINER", "INVOKER")): 2189 security_specifier = self._prev.text.upper() 2190 return self.expression(exp.SecurityProperty, this=security_specifier) 2191 return None 2192 2193 def _parse_settings_property(self) -> exp.SettingsProperty: 2194 return self.expression( 2195 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2196 ) 2197 2198 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2199 if self._index >= 2: 2200 pre_volatile_token = self._tokens[self._index - 2] 2201 else: 2202 pre_volatile_token = None 2203 2204 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2205 return exp.VolatileProperty() 2206 2207 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2208 2209 def _parse_retention_period(self) -> exp.Var: 2210 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2211 number = self._parse_number() 2212 number_str = f"{number} " if number else "" 2213 unit = self._parse_var(any_token=True) 2214 return exp.var(f"{number_str}{unit}") 2215 2216 def _parse_system_versioning_property( 2217 self, with_: bool = False 2218 ) -> exp.WithSystemVersioningProperty: 2219 self._match(TokenType.EQ) 2220 prop = self.expression( 2221 exp.WithSystemVersioningProperty, 2222 **{ # type: ignore 2223 "on": True, 2224 "with": with_, 2225 }, 2226 ) 2227 2228 if self._match_text_seq("OFF"): 2229 prop.set("on", False) 2230 return prop 2231 2232 self._match(TokenType.ON) 2233 if self._match(TokenType.L_PAREN): 2234 while self._curr and not self._match(TokenType.R_PAREN): 2235 if self._match_text_seq("HISTORY_TABLE", "="): 2236 prop.set("this", self._parse_table_parts()) 2237 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2238 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2239 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2240 prop.set("retention_period", self._parse_retention_period()) 2241 2242 self._match(TokenType.COMMA) 2243 2244 return prop 2245 2246 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2247 self._match(TokenType.EQ) 2248 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2249 prop = self.expression(exp.DataDeletionProperty, on=on) 2250 2251 if self._match(TokenType.L_PAREN): 2252 while self._curr and not self._match(TokenType.R_PAREN): 2253 if self._match_text_seq("FILTER_COLUMN", "="): 2254 prop.set("filter_column", self._parse_column()) 2255 elif self._match_text_seq("RETENTION_PERIOD", "="): 2256 prop.set("retention_period", self._parse_retention_period()) 2257 2258 self._match(TokenType.COMMA) 2259 2260 return prop 2261 2262 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2263 kind = "HASH" 2264 expressions: t.Optional[t.List[exp.Expression]] = None 2265 if self._match_text_seq("BY", "HASH"): 2266 expressions = self._parse_wrapped_csv(self._parse_id_var) 2267 elif self._match_text_seq("BY", "RANDOM"): 2268 kind = "RANDOM" 2269 2270 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2271 buckets: t.Optional[exp.Expression] = None 2272 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2273 buckets = self._parse_number() 2274 2275 return self.expression( 2276 exp.DistributedByProperty, 2277 expressions=expressions, 2278 kind=kind, 2279 buckets=buckets, 2280 order=self._parse_order(), 2281 ) 2282 2283 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2284 self._match_text_seq("KEY") 2285 expressions = self._parse_wrapped_id_vars() 2286 return self.expression(expr_type, expressions=expressions) 2287 2288 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2289 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2290 prop = self._parse_system_versioning_property(with_=True) 2291 self._match_r_paren() 2292 return prop 2293 2294 if self._match(TokenType.L_PAREN, advance=False): 2295 return self._parse_wrapped_properties() 2296 2297 if self._match_text_seq("JOURNAL"): 2298 return self._parse_withjournaltable() 2299 2300 if self._match_texts(self.VIEW_ATTRIBUTES): 2301 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2302 2303 if self._match_text_seq("DATA"): 2304 return self._parse_withdata(no=False) 2305 elif self._match_text_seq("NO", "DATA"): 2306 return self._parse_withdata(no=True) 2307 2308 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2309 return self._parse_serde_properties(with_=True) 2310 2311 if self._match(TokenType.SCHEMA): 2312 return self.expression( 2313 exp.WithSchemaBindingProperty, 2314 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2315 ) 2316 2317 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2318 return self.expression( 2319 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2320 ) 2321 2322 if not self._next: 2323 return None 2324 2325 return self._parse_withisolatedloading() 2326 2327 def _parse_procedure_option(self) -> exp.Expression | None: 2328 if self._match_text_seq("EXECUTE", "AS"): 2329 return self.expression( 2330 exp.ExecuteAsProperty, 2331 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2332 or self._parse_string(), 2333 ) 2334 2335 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2336 2337 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2338 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2339 self._match(TokenType.EQ) 2340 2341 user = self._parse_id_var() 2342 self._match(TokenType.PARAMETER) 2343 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2344 2345 if not user or not host: 2346 return None 2347 2348 return exp.DefinerProperty(this=f"{user}@{host}") 2349 2350 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2351 self._match(TokenType.TABLE) 2352 self._match(TokenType.EQ) 2353 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2354 2355 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2356 return self.expression(exp.LogProperty, no=no) 2357 2358 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2359 return self.expression(exp.JournalProperty, **kwargs) 2360 2361 def _parse_checksum(self) -> exp.ChecksumProperty: 2362 self._match(TokenType.EQ) 2363 2364 on = None 2365 if self._match(TokenType.ON): 2366 on = True 2367 elif self._match_text_seq("OFF"): 2368 on = False 2369 2370 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2371 2372 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2373 return self.expression( 2374 exp.Cluster, 2375 expressions=( 2376 self._parse_wrapped_csv(self._parse_ordered) 2377 if wrapped 2378 else self._parse_csv(self._parse_ordered) 2379 ), 2380 ) 2381 2382 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2383 self._match_text_seq("BY") 2384 2385 self._match_l_paren() 2386 expressions = self._parse_csv(self._parse_column) 2387 self._match_r_paren() 2388 2389 if self._match_text_seq("SORTED", "BY"): 2390 self._match_l_paren() 2391 sorted_by = self._parse_csv(self._parse_ordered) 2392 self._match_r_paren() 2393 else: 2394 sorted_by = None 2395 2396 self._match(TokenType.INTO) 2397 buckets = self._parse_number() 2398 self._match_text_seq("BUCKETS") 2399 2400 return self.expression( 2401 exp.ClusteredByProperty, 2402 expressions=expressions, 2403 sorted_by=sorted_by, 2404 buckets=buckets, 2405 ) 2406 2407 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2408 if not self._match_text_seq("GRANTS"): 2409 self._retreat(self._index - 1) 2410 return None 2411 2412 return self.expression(exp.CopyGrantsProperty) 2413 2414 def _parse_freespace(self) -> exp.FreespaceProperty: 2415 self._match(TokenType.EQ) 2416 return self.expression( 2417 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2418 ) 2419 2420 def _parse_mergeblockratio( 2421 self, no: bool = False, default: bool = False 2422 ) -> exp.MergeBlockRatioProperty: 2423 if self._match(TokenType.EQ): 2424 return self.expression( 2425 exp.MergeBlockRatioProperty, 2426 this=self._parse_number(), 2427 percent=self._match(TokenType.PERCENT), 2428 ) 2429 2430 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2431 2432 def _parse_datablocksize( 2433 self, 2434 default: t.Optional[bool] = None, 2435 minimum: t.Optional[bool] = None, 2436 maximum: t.Optional[bool] = None, 2437 ) -> exp.DataBlocksizeProperty: 2438 self._match(TokenType.EQ) 2439 size = self._parse_number() 2440 2441 units = None 2442 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2443 units = self._prev.text 2444 2445 return self.expression( 2446 exp.DataBlocksizeProperty, 2447 size=size, 2448 units=units, 2449 default=default, 2450 minimum=minimum, 2451 maximum=maximum, 2452 ) 2453 2454 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2455 self._match(TokenType.EQ) 2456 always = self._match_text_seq("ALWAYS") 2457 manual = self._match_text_seq("MANUAL") 2458 never = self._match_text_seq("NEVER") 2459 default = self._match_text_seq("DEFAULT") 2460 2461 autotemp = None 2462 if self._match_text_seq("AUTOTEMP"): 2463 autotemp = self._parse_schema() 2464 2465 return self.expression( 2466 exp.BlockCompressionProperty, 2467 always=always, 2468 manual=manual, 2469 never=never, 2470 default=default, 2471 autotemp=autotemp, 2472 ) 2473 2474 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2475 index = self._index 2476 no = self._match_text_seq("NO") 2477 concurrent = self._match_text_seq("CONCURRENT") 2478 2479 if not self._match_text_seq("ISOLATED", "LOADING"): 2480 self._retreat(index) 2481 return None 2482 2483 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2484 return self.expression( 2485 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2486 ) 2487 2488 def _parse_locking(self) -> exp.LockingProperty: 2489 if self._match(TokenType.TABLE): 2490 kind = "TABLE" 2491 elif self._match(TokenType.VIEW): 2492 kind = "VIEW" 2493 elif self._match(TokenType.ROW): 2494 kind = "ROW" 2495 elif self._match_text_seq("DATABASE"): 2496 kind = "DATABASE" 2497 else: 2498 kind = None 2499 2500 if kind in ("DATABASE", "TABLE", "VIEW"): 2501 this = self._parse_table_parts() 2502 else: 2503 this = None 2504 2505 if self._match(TokenType.FOR): 2506 for_or_in = "FOR" 2507 elif self._match(TokenType.IN): 2508 for_or_in = "IN" 2509 else: 2510 for_or_in = None 2511 2512 if self._match_text_seq("ACCESS"): 2513 lock_type = "ACCESS" 2514 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2515 lock_type = "EXCLUSIVE" 2516 elif self._match_text_seq("SHARE"): 2517 lock_type = "SHARE" 2518 elif self._match_text_seq("READ"): 2519 lock_type = "READ" 2520 elif self._match_text_seq("WRITE"): 2521 lock_type = "WRITE" 2522 elif self._match_text_seq("CHECKSUM"): 2523 lock_type = "CHECKSUM" 2524 else: 2525 lock_type = None 2526 2527 override = self._match_text_seq("OVERRIDE") 2528 2529 return self.expression( 2530 exp.LockingProperty, 2531 this=this, 2532 kind=kind, 2533 for_or_in=for_or_in, 2534 lock_type=lock_type, 2535 override=override, 2536 ) 2537 2538 def _parse_partition_by(self) -> t.List[exp.Expression]: 2539 if self._match(TokenType.PARTITION_BY): 2540 return self._parse_csv(self._parse_assignment) 2541 return [] 2542 2543 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2544 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2545 if self._match_text_seq("MINVALUE"): 2546 return exp.var("MINVALUE") 2547 if self._match_text_seq("MAXVALUE"): 2548 return exp.var("MAXVALUE") 2549 return self._parse_bitwise() 2550 2551 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2552 expression = None 2553 from_expressions = None 2554 to_expressions = None 2555 2556 if self._match(TokenType.IN): 2557 this = self._parse_wrapped_csv(self._parse_bitwise) 2558 elif self._match(TokenType.FROM): 2559 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2560 self._match_text_seq("TO") 2561 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2562 elif self._match_text_seq("WITH", "(", "MODULUS"): 2563 this = self._parse_number() 2564 self._match_text_seq(",", "REMAINDER") 2565 expression = self._parse_number() 2566 self._match_r_paren() 2567 else: 2568 self.raise_error("Failed to parse partition bound spec.") 2569 2570 return self.expression( 2571 exp.PartitionBoundSpec, 2572 this=this, 2573 expression=expression, 2574 from_expressions=from_expressions, 2575 to_expressions=to_expressions, 2576 ) 2577 2578 # https://www.postgresql.org/docs/current/sql-createtable.html 2579 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2580 if not self._match_text_seq("OF"): 2581 self._retreat(self._index - 1) 2582 return None 2583 2584 this = self._parse_table(schema=True) 2585 2586 if self._match(TokenType.DEFAULT): 2587 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2588 elif self._match_text_seq("FOR", "VALUES"): 2589 expression = self._parse_partition_bound_spec() 2590 else: 2591 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2592 2593 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2594 2595 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2596 self._match(TokenType.EQ) 2597 return self.expression( 2598 exp.PartitionedByProperty, 2599 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2600 ) 2601 2602 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2603 if self._match_text_seq("AND", "STATISTICS"): 2604 statistics = True 2605 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2606 statistics = False 2607 else: 2608 statistics = None 2609 2610 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2611 2612 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2613 if self._match_text_seq("SQL"): 2614 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2615 return None 2616 2617 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2618 if self._match_text_seq("SQL", "DATA"): 2619 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2620 return None 2621 2622 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2623 if self._match_text_seq("PRIMARY", "INDEX"): 2624 return exp.NoPrimaryIndexProperty() 2625 if self._match_text_seq("SQL"): 2626 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2627 return None 2628 2629 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2630 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2631 return exp.OnCommitProperty() 2632 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2633 return exp.OnCommitProperty(delete=True) 2634 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2635 2636 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2637 if self._match_text_seq("SQL", "DATA"): 2638 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2639 return None 2640 2641 def _parse_distkey(self) -> exp.DistKeyProperty: 2642 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2643 2644 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2645 table = self._parse_table(schema=True) 2646 2647 options = [] 2648 while self._match_texts(("INCLUDING", "EXCLUDING")): 2649 this = self._prev.text.upper() 2650 2651 id_var = self._parse_id_var() 2652 if not id_var: 2653 return None 2654 2655 options.append( 2656 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2657 ) 2658 2659 return self.expression(exp.LikeProperty, this=table, expressions=options) 2660 2661 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2662 return self.expression( 2663 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2664 ) 2665 2666 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2667 self._match(TokenType.EQ) 2668 return self.expression( 2669 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2670 ) 2671 2672 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2673 self._match_text_seq("WITH", "CONNECTION") 2674 return self.expression( 2675 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2676 ) 2677 2678 def _parse_returns(self) -> exp.ReturnsProperty: 2679 value: t.Optional[exp.Expression] 2680 null = None 2681 is_table = self._match(TokenType.TABLE) 2682 2683 if is_table: 2684 if self._match(TokenType.LT): 2685 value = self.expression( 2686 exp.Schema, 2687 this="TABLE", 2688 expressions=self._parse_csv(self._parse_struct_types), 2689 ) 2690 if not self._match(TokenType.GT): 2691 self.raise_error("Expecting >") 2692 else: 2693 value = self._parse_schema(exp.var("TABLE")) 2694 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2695 null = True 2696 value = None 2697 else: 2698 value = self._parse_types() 2699 2700 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2701 2702 def _parse_describe(self) -> exp.Describe: 2703 kind = self._match_set(self.CREATABLES) and self._prev.text 2704 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2705 if self._match(TokenType.DOT): 2706 style = None 2707 self._retreat(self._index - 2) 2708 2709 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2710 2711 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2712 this = self._parse_statement() 2713 else: 2714 this = self._parse_table(schema=True) 2715 2716 properties = self._parse_properties() 2717 expressions = properties.expressions if properties else None 2718 partition = self._parse_partition() 2719 return self.expression( 2720 exp.Describe, 2721 this=this, 2722 style=style, 2723 kind=kind, 2724 expressions=expressions, 2725 partition=partition, 2726 format=format, 2727 ) 2728 2729 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2730 kind = self._prev.text.upper() 2731 expressions = [] 2732 2733 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2734 if self._match(TokenType.WHEN): 2735 expression = self._parse_disjunction() 2736 self._match(TokenType.THEN) 2737 else: 2738 expression = None 2739 2740 else_ = self._match(TokenType.ELSE) 2741 2742 if not self._match(TokenType.INTO): 2743 return None 2744 2745 return self.expression( 2746 exp.ConditionalInsert, 2747 this=self.expression( 2748 exp.Insert, 2749 this=self._parse_table(schema=True), 2750 expression=self._parse_derived_table_values(), 2751 ), 2752 expression=expression, 2753 else_=else_, 2754 ) 2755 2756 expression = parse_conditional_insert() 2757 while expression is not None: 2758 expressions.append(expression) 2759 expression = parse_conditional_insert() 2760 2761 return self.expression( 2762 exp.MultitableInserts, 2763 kind=kind, 2764 comments=comments, 2765 expressions=expressions, 2766 source=self._parse_table(), 2767 ) 2768 2769 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2770 comments = [] 2771 hint = self._parse_hint() 2772 overwrite = self._match(TokenType.OVERWRITE) 2773 ignore = self._match(TokenType.IGNORE) 2774 local = self._match_text_seq("LOCAL") 2775 alternative = None 2776 is_function = None 2777 2778 if self._match_text_seq("DIRECTORY"): 2779 this: t.Optional[exp.Expression] = self.expression( 2780 exp.Directory, 2781 this=self._parse_var_or_string(), 2782 local=local, 2783 row_format=self._parse_row_format(match_row=True), 2784 ) 2785 else: 2786 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2787 comments += ensure_list(self._prev_comments) 2788 return self._parse_multitable_inserts(comments) 2789 2790 if self._match(TokenType.OR): 2791 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2792 2793 self._match(TokenType.INTO) 2794 comments += ensure_list(self._prev_comments) 2795 self._match(TokenType.TABLE) 2796 is_function = self._match(TokenType.FUNCTION) 2797 2798 this = ( 2799 self._parse_table(schema=True, parse_partition=True) 2800 if not is_function 2801 else self._parse_function() 2802 ) 2803 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2804 this.set("alias", self._parse_table_alias()) 2805 2806 returning = self._parse_returning() 2807 2808 return self.expression( 2809 exp.Insert, 2810 comments=comments, 2811 hint=hint, 2812 is_function=is_function, 2813 this=this, 2814 stored=self._match_text_seq("STORED") and self._parse_stored(), 2815 by_name=self._match_text_seq("BY", "NAME"), 2816 exists=self._parse_exists(), 2817 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2818 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2819 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2820 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2821 conflict=self._parse_on_conflict(), 2822 returning=returning or self._parse_returning(), 2823 overwrite=overwrite, 2824 alternative=alternative, 2825 ignore=ignore, 2826 source=self._match(TokenType.TABLE) and self._parse_table(), 2827 ) 2828 2829 def _parse_kill(self) -> exp.Kill: 2830 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2831 2832 return self.expression( 2833 exp.Kill, 2834 this=self._parse_primary(), 2835 kind=kind, 2836 ) 2837 2838 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2839 conflict = self._match_text_seq("ON", "CONFLICT") 2840 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2841 2842 if not conflict and not duplicate: 2843 return None 2844 2845 conflict_keys = None 2846 constraint = None 2847 2848 if conflict: 2849 if self._match_text_seq("ON", "CONSTRAINT"): 2850 constraint = self._parse_id_var() 2851 elif self._match(TokenType.L_PAREN): 2852 conflict_keys = self._parse_csv(self._parse_id_var) 2853 self._match_r_paren() 2854 2855 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2856 if self._prev.token_type == TokenType.UPDATE: 2857 self._match(TokenType.SET) 2858 expressions = self._parse_csv(self._parse_equality) 2859 else: 2860 expressions = None 2861 2862 return self.expression( 2863 exp.OnConflict, 2864 duplicate=duplicate, 2865 expressions=expressions, 2866 action=action, 2867 conflict_keys=conflict_keys, 2868 constraint=constraint, 2869 where=self._parse_where(), 2870 ) 2871 2872 def _parse_returning(self) -> t.Optional[exp.Returning]: 2873 if not self._match(TokenType.RETURNING): 2874 return None 2875 return self.expression( 2876 exp.Returning, 2877 expressions=self._parse_csv(self._parse_expression), 2878 into=self._match(TokenType.INTO) and self._parse_table_part(), 2879 ) 2880 2881 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2882 if not self._match(TokenType.FORMAT): 2883 return None 2884 return self._parse_row_format() 2885 2886 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2887 index = self._index 2888 with_ = with_ or self._match_text_seq("WITH") 2889 2890 if not self._match(TokenType.SERDE_PROPERTIES): 2891 self._retreat(index) 2892 return None 2893 return self.expression( 2894 exp.SerdeProperties, 2895 **{ # type: ignore 2896 "expressions": self._parse_wrapped_properties(), 2897 "with": with_, 2898 }, 2899 ) 2900 2901 def _parse_row_format( 2902 self, match_row: bool = False 2903 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2904 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2905 return None 2906 2907 if self._match_text_seq("SERDE"): 2908 this = self._parse_string() 2909 2910 serde_properties = self._parse_serde_properties() 2911 2912 return self.expression( 2913 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2914 ) 2915 2916 self._match_text_seq("DELIMITED") 2917 2918 kwargs = {} 2919 2920 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2921 kwargs["fields"] = self._parse_string() 2922 if self._match_text_seq("ESCAPED", "BY"): 2923 kwargs["escaped"] = self._parse_string() 2924 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2925 kwargs["collection_items"] = self._parse_string() 2926 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2927 kwargs["map_keys"] = self._parse_string() 2928 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2929 kwargs["lines"] = self._parse_string() 2930 if self._match_text_seq("NULL", "DEFINED", "AS"): 2931 kwargs["null"] = self._parse_string() 2932 2933 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2934 2935 def _parse_load(self) -> exp.LoadData | exp.Command: 2936 if self._match_text_seq("DATA"): 2937 local = self._match_text_seq("LOCAL") 2938 self._match_text_seq("INPATH") 2939 inpath = self._parse_string() 2940 overwrite = self._match(TokenType.OVERWRITE) 2941 self._match_pair(TokenType.INTO, TokenType.TABLE) 2942 2943 return self.expression( 2944 exp.LoadData, 2945 this=self._parse_table(schema=True), 2946 local=local, 2947 overwrite=overwrite, 2948 inpath=inpath, 2949 partition=self._parse_partition(), 2950 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2951 serde=self._match_text_seq("SERDE") and self._parse_string(), 2952 ) 2953 return self._parse_as_command(self._prev) 2954 2955 def _parse_delete(self) -> exp.Delete: 2956 # This handles MySQL's "Multiple-Table Syntax" 2957 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2958 tables = None 2959 if not self._match(TokenType.FROM, advance=False): 2960 tables = self._parse_csv(self._parse_table) or None 2961 2962 returning = self._parse_returning() 2963 2964 return self.expression( 2965 exp.Delete, 2966 tables=tables, 2967 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2968 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2969 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2970 where=self._parse_where(), 2971 returning=returning or self._parse_returning(), 2972 limit=self._parse_limit(), 2973 ) 2974 2975 def _parse_update(self) -> exp.Update: 2976 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2977 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2978 returning = self._parse_returning() 2979 return self.expression( 2980 exp.Update, 2981 **{ # type: ignore 2982 "this": this, 2983 "expressions": expressions, 2984 "from": self._parse_from(joins=True), 2985 "where": self._parse_where(), 2986 "returning": returning or self._parse_returning(), 2987 "order": self._parse_order(), 2988 "limit": self._parse_limit(), 2989 }, 2990 ) 2991 2992 def _parse_use(self) -> exp.Use: 2993 return self.expression( 2994 exp.Use, 2995 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 2996 this=self._parse_table(schema=False), 2997 ) 2998 2999 def _parse_uncache(self) -> exp.Uncache: 3000 if not self._match(TokenType.TABLE): 3001 self.raise_error("Expecting TABLE after UNCACHE") 3002 3003 return self.expression( 3004 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3005 ) 3006 3007 def _parse_cache(self) -> exp.Cache: 3008 lazy = self._match_text_seq("LAZY") 3009 self._match(TokenType.TABLE) 3010 table = self._parse_table(schema=True) 3011 3012 options = [] 3013 if self._match_text_seq("OPTIONS"): 3014 self._match_l_paren() 3015 k = self._parse_string() 3016 self._match(TokenType.EQ) 3017 v = self._parse_string() 3018 options = [k, v] 3019 self._match_r_paren() 3020 3021 self._match(TokenType.ALIAS) 3022 return self.expression( 3023 exp.Cache, 3024 this=table, 3025 lazy=lazy, 3026 options=options, 3027 expression=self._parse_select(nested=True), 3028 ) 3029 3030 def _parse_partition(self) -> t.Optional[exp.Partition]: 3031 if not self._match_texts(self.PARTITION_KEYWORDS): 3032 return None 3033 3034 return self.expression( 3035 exp.Partition, 3036 subpartition=self._prev.text.upper() == "SUBPARTITION", 3037 expressions=self._parse_wrapped_csv(self._parse_assignment), 3038 ) 3039 3040 def _parse_value(self) -> t.Optional[exp.Tuple]: 3041 def _parse_value_expression() -> t.Optional[exp.Expression]: 3042 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3043 return exp.var(self._prev.text.upper()) 3044 return self._parse_expression() 3045 3046 if self._match(TokenType.L_PAREN): 3047 expressions = self._parse_csv(_parse_value_expression) 3048 self._match_r_paren() 3049 return self.expression(exp.Tuple, expressions=expressions) 3050 3051 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3052 expression = self._parse_expression() 3053 if expression: 3054 return self.expression(exp.Tuple, expressions=[expression]) 3055 return None 3056 3057 def _parse_projections(self) -> t.List[exp.Expression]: 3058 return self._parse_expressions() 3059 3060 def _parse_select( 3061 self, 3062 nested: bool = False, 3063 table: bool = False, 3064 parse_subquery_alias: bool = True, 3065 parse_set_operation: bool = True, 3066 ) -> t.Optional[exp.Expression]: 3067 cte = self._parse_with() 3068 3069 if cte: 3070 this = self._parse_statement() 3071 3072 if not this: 3073 self.raise_error("Failed to parse any statement following CTE") 3074 return cte 3075 3076 if "with" in this.arg_types: 3077 this.set("with", cte) 3078 else: 3079 self.raise_error(f"{this.key} does not support CTE") 3080 this = cte 3081 3082 return this 3083 3084 # duckdb supports leading with FROM x 3085 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3086 3087 if self._match(TokenType.SELECT): 3088 comments = self._prev_comments 3089 3090 hint = self._parse_hint() 3091 3092 if self._next and not self._next.token_type == TokenType.DOT: 3093 all_ = self._match(TokenType.ALL) 3094 distinct = self._match_set(self.DISTINCT_TOKENS) 3095 else: 3096 all_, distinct = None, None 3097 3098 kind = ( 3099 self._match(TokenType.ALIAS) 3100 and self._match_texts(("STRUCT", "VALUE")) 3101 and self._prev.text.upper() 3102 ) 3103 3104 if distinct: 3105 distinct = self.expression( 3106 exp.Distinct, 3107 on=self._parse_value() if self._match(TokenType.ON) else None, 3108 ) 3109 3110 if all_ and distinct: 3111 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3112 3113 operation_modifiers = [] 3114 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3115 operation_modifiers.append(exp.var(self._prev.text.upper())) 3116 3117 limit = self._parse_limit(top=True) 3118 projections = self._parse_projections() 3119 3120 this = self.expression( 3121 exp.Select, 3122 kind=kind, 3123 hint=hint, 3124 distinct=distinct, 3125 expressions=projections, 3126 limit=limit, 3127 operation_modifiers=operation_modifiers or None, 3128 ) 3129 this.comments = comments 3130 3131 into = self._parse_into() 3132 if into: 3133 this.set("into", into) 3134 3135 if not from_: 3136 from_ = self._parse_from() 3137 3138 if from_: 3139 this.set("from", from_) 3140 3141 this = self._parse_query_modifiers(this) 3142 elif (table or nested) and self._match(TokenType.L_PAREN): 3143 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3144 this = self._parse_simplified_pivot( 3145 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3146 ) 3147 elif self._match(TokenType.FROM): 3148 from_ = self._parse_from(skip_from_token=True) 3149 # Support parentheses for duckdb FROM-first syntax 3150 select = self._parse_select() 3151 if select: 3152 select.set("from", from_) 3153 this = select 3154 else: 3155 this = exp.select("*").from_(t.cast(exp.From, from_)) 3156 else: 3157 this = ( 3158 self._parse_table() 3159 if table 3160 else self._parse_select(nested=True, parse_set_operation=False) 3161 ) 3162 3163 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3164 # in case a modifier (e.g. join) is following 3165 if table and isinstance(this, exp.Values) and this.alias: 3166 alias = this.args["alias"].pop() 3167 this = exp.Table(this=this, alias=alias) 3168 3169 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3170 3171 self._match_r_paren() 3172 3173 # We return early here so that the UNION isn't attached to the subquery by the 3174 # following call to _parse_set_operations, but instead becomes the parent node 3175 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3176 elif self._match(TokenType.VALUES, advance=False): 3177 this = self._parse_derived_table_values() 3178 elif from_: 3179 this = exp.select("*").from_(from_.this, copy=False) 3180 elif self._match(TokenType.SUMMARIZE): 3181 table = self._match(TokenType.TABLE) 3182 this = self._parse_select() or self._parse_string() or self._parse_table() 3183 return self.expression(exp.Summarize, this=this, table=table) 3184 elif self._match(TokenType.DESCRIBE): 3185 this = self._parse_describe() 3186 elif self._match_text_seq("STREAM"): 3187 this = self._parse_function() 3188 if this: 3189 this = self.expression(exp.Stream, this=this) 3190 else: 3191 self._retreat(self._index - 1) 3192 else: 3193 this = None 3194 3195 return self._parse_set_operations(this) if parse_set_operation else this 3196 3197 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3198 self._match_text_seq("SEARCH") 3199 3200 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3201 3202 if not kind: 3203 return None 3204 3205 self._match_text_seq("FIRST", "BY") 3206 3207 return self.expression( 3208 exp.RecursiveWithSearch, 3209 kind=kind, 3210 this=self._parse_id_var(), 3211 expression=self._match_text_seq("SET") and self._parse_id_var(), 3212 using=self._match_text_seq("USING") and self._parse_id_var(), 3213 ) 3214 3215 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3216 if not skip_with_token and not self._match(TokenType.WITH): 3217 return None 3218 3219 comments = self._prev_comments 3220 recursive = self._match(TokenType.RECURSIVE) 3221 3222 last_comments = None 3223 expressions = [] 3224 while True: 3225 cte = self._parse_cte() 3226 if isinstance(cte, exp.CTE): 3227 expressions.append(cte) 3228 if last_comments: 3229 cte.add_comments(last_comments) 3230 3231 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3232 break 3233 else: 3234 self._match(TokenType.WITH) 3235 3236 last_comments = self._prev_comments 3237 3238 return self.expression( 3239 exp.With, 3240 comments=comments, 3241 expressions=expressions, 3242 recursive=recursive, 3243 search=self._parse_recursive_with_search(), 3244 ) 3245 3246 def _parse_cte(self) -> t.Optional[exp.CTE]: 3247 index = self._index 3248 3249 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3250 if not alias or not alias.this: 3251 self.raise_error("Expected CTE to have alias") 3252 3253 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3254 self._retreat(index) 3255 return None 3256 3257 comments = self._prev_comments 3258 3259 if self._match_text_seq("NOT", "MATERIALIZED"): 3260 materialized = False 3261 elif self._match_text_seq("MATERIALIZED"): 3262 materialized = True 3263 else: 3264 materialized = None 3265 3266 cte = self.expression( 3267 exp.CTE, 3268 this=self._parse_wrapped(self._parse_statement), 3269 alias=alias, 3270 materialized=materialized, 3271 comments=comments, 3272 ) 3273 3274 if isinstance(cte.this, exp.Values): 3275 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3276 3277 return cte 3278 3279 def _parse_table_alias( 3280 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3281 ) -> t.Optional[exp.TableAlias]: 3282 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3283 # so this section tries to parse the clause version and if it fails, it treats the token 3284 # as an identifier (alias) 3285 if self._can_parse_limit_or_offset(): 3286 return None 3287 3288 any_token = self._match(TokenType.ALIAS) 3289 alias = ( 3290 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3291 or self._parse_string_as_identifier() 3292 ) 3293 3294 index = self._index 3295 if self._match(TokenType.L_PAREN): 3296 columns = self._parse_csv(self._parse_function_parameter) 3297 self._match_r_paren() if columns else self._retreat(index) 3298 else: 3299 columns = None 3300 3301 if not alias and not columns: 3302 return None 3303 3304 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3305 3306 # We bubble up comments from the Identifier to the TableAlias 3307 if isinstance(alias, exp.Identifier): 3308 table_alias.add_comments(alias.pop_comments()) 3309 3310 return table_alias 3311 3312 def _parse_subquery( 3313 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3314 ) -> t.Optional[exp.Subquery]: 3315 if not this: 3316 return None 3317 3318 return self.expression( 3319 exp.Subquery, 3320 this=this, 3321 pivots=self._parse_pivots(), 3322 alias=self._parse_table_alias() if parse_alias else None, 3323 sample=self._parse_table_sample(), 3324 ) 3325 3326 def _implicit_unnests_to_explicit(self, this: E) -> E: 3327 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3328 3329 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3330 for i, join in enumerate(this.args.get("joins") or []): 3331 table = join.this 3332 normalized_table = table.copy() 3333 normalized_table.meta["maybe_column"] = True 3334 normalized_table = _norm(normalized_table, dialect=self.dialect) 3335 3336 if isinstance(table, exp.Table) and not join.args.get("on"): 3337 if normalized_table.parts[0].name in refs: 3338 table_as_column = table.to_column() 3339 unnest = exp.Unnest(expressions=[table_as_column]) 3340 3341 # Table.to_column creates a parent Alias node that we want to convert to 3342 # a TableAlias and attach to the Unnest, so it matches the parser's output 3343 if isinstance(table.args.get("alias"), exp.TableAlias): 3344 table_as_column.replace(table_as_column.this) 3345 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3346 3347 table.replace(unnest) 3348 3349 refs.add(normalized_table.alias_or_name) 3350 3351 return this 3352 3353 def _parse_query_modifiers( 3354 self, this: t.Optional[exp.Expression] 3355 ) -> t.Optional[exp.Expression]: 3356 if isinstance(this, self.MODIFIABLES): 3357 for join in self._parse_joins(): 3358 this.append("joins", join) 3359 for lateral in iter(self._parse_lateral, None): 3360 this.append("laterals", lateral) 3361 3362 while True: 3363 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3364 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3365 key, expression = parser(self) 3366 3367 if expression: 3368 this.set(key, expression) 3369 if key == "limit": 3370 offset = expression.args.pop("offset", None) 3371 3372 if offset: 3373 offset = exp.Offset(expression=offset) 3374 this.set("offset", offset) 3375 3376 limit_by_expressions = expression.expressions 3377 expression.set("expressions", None) 3378 offset.set("expressions", limit_by_expressions) 3379 continue 3380 break 3381 3382 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3383 this = self._implicit_unnests_to_explicit(this) 3384 3385 return this 3386 3387 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3388 start = self._curr 3389 while self._curr: 3390 self._advance() 3391 3392 end = self._tokens[self._index - 1] 3393 return exp.Hint(expressions=[self._find_sql(start, end)]) 3394 3395 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3396 return self._parse_function_call() 3397 3398 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3399 start_index = self._index 3400 should_fallback_to_string = False 3401 3402 hints = [] 3403 try: 3404 for hint in iter( 3405 lambda: self._parse_csv( 3406 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3407 ), 3408 [], 3409 ): 3410 hints.extend(hint) 3411 except ParseError: 3412 should_fallback_to_string = True 3413 3414 if should_fallback_to_string or self._curr: 3415 self._retreat(start_index) 3416 return self._parse_hint_fallback_to_string() 3417 3418 return self.expression(exp.Hint, expressions=hints) 3419 3420 def _parse_hint(self) -> t.Optional[exp.Hint]: 3421 if self._match(TokenType.HINT) and self._prev_comments: 3422 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3423 3424 return None 3425 3426 def _parse_into(self) -> t.Optional[exp.Into]: 3427 if not self._match(TokenType.INTO): 3428 return None 3429 3430 temp = self._match(TokenType.TEMPORARY) 3431 unlogged = self._match_text_seq("UNLOGGED") 3432 self._match(TokenType.TABLE) 3433 3434 return self.expression( 3435 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3436 ) 3437 3438 def _parse_from( 3439 self, joins: bool = False, skip_from_token: bool = False 3440 ) -> t.Optional[exp.From]: 3441 if not skip_from_token and not self._match(TokenType.FROM): 3442 return None 3443 3444 return self.expression( 3445 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3446 ) 3447 3448 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3449 return self.expression( 3450 exp.MatchRecognizeMeasure, 3451 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3452 this=self._parse_expression(), 3453 ) 3454 3455 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3456 if not self._match(TokenType.MATCH_RECOGNIZE): 3457 return None 3458 3459 self._match_l_paren() 3460 3461 partition = self._parse_partition_by() 3462 order = self._parse_order() 3463 3464 measures = ( 3465 self._parse_csv(self._parse_match_recognize_measure) 3466 if self._match_text_seq("MEASURES") 3467 else None 3468 ) 3469 3470 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3471 rows = exp.var("ONE ROW PER MATCH") 3472 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3473 text = "ALL ROWS PER MATCH" 3474 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3475 text += " SHOW EMPTY MATCHES" 3476 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3477 text += " OMIT EMPTY MATCHES" 3478 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3479 text += " WITH UNMATCHED ROWS" 3480 rows = exp.var(text) 3481 else: 3482 rows = None 3483 3484 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3485 text = "AFTER MATCH SKIP" 3486 if self._match_text_seq("PAST", "LAST", "ROW"): 3487 text += " PAST LAST ROW" 3488 elif self._match_text_seq("TO", "NEXT", "ROW"): 3489 text += " TO NEXT ROW" 3490 elif self._match_text_seq("TO", "FIRST"): 3491 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3492 elif self._match_text_seq("TO", "LAST"): 3493 text += f" TO LAST {self._advance_any().text}" # type: ignore 3494 after = exp.var(text) 3495 else: 3496 after = None 3497 3498 if self._match_text_seq("PATTERN"): 3499 self._match_l_paren() 3500 3501 if not self._curr: 3502 self.raise_error("Expecting )", self._curr) 3503 3504 paren = 1 3505 start = self._curr 3506 3507 while self._curr and paren > 0: 3508 if self._curr.token_type == TokenType.L_PAREN: 3509 paren += 1 3510 if self._curr.token_type == TokenType.R_PAREN: 3511 paren -= 1 3512 3513 end = self._prev 3514 self._advance() 3515 3516 if paren > 0: 3517 self.raise_error("Expecting )", self._curr) 3518 3519 pattern = exp.var(self._find_sql(start, end)) 3520 else: 3521 pattern = None 3522 3523 define = ( 3524 self._parse_csv(self._parse_name_as_expression) 3525 if self._match_text_seq("DEFINE") 3526 else None 3527 ) 3528 3529 self._match_r_paren() 3530 3531 return self.expression( 3532 exp.MatchRecognize, 3533 partition_by=partition, 3534 order=order, 3535 measures=measures, 3536 rows=rows, 3537 after=after, 3538 pattern=pattern, 3539 define=define, 3540 alias=self._parse_table_alias(), 3541 ) 3542 3543 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3544 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3545 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3546 cross_apply = False 3547 3548 if cross_apply is not None: 3549 this = self._parse_select(table=True) 3550 view = None 3551 outer = None 3552 elif self._match(TokenType.LATERAL): 3553 this = self._parse_select(table=True) 3554 view = self._match(TokenType.VIEW) 3555 outer = self._match(TokenType.OUTER) 3556 else: 3557 return None 3558 3559 if not this: 3560 this = ( 3561 self._parse_unnest() 3562 or self._parse_function() 3563 or self._parse_id_var(any_token=False) 3564 ) 3565 3566 while self._match(TokenType.DOT): 3567 this = exp.Dot( 3568 this=this, 3569 expression=self._parse_function() or self._parse_id_var(any_token=False), 3570 ) 3571 3572 if view: 3573 table = self._parse_id_var(any_token=False) 3574 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3575 table_alias: t.Optional[exp.TableAlias] = self.expression( 3576 exp.TableAlias, this=table, columns=columns 3577 ) 3578 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3579 # We move the alias from the lateral's child node to the lateral itself 3580 table_alias = this.args["alias"].pop() 3581 else: 3582 table_alias = self._parse_table_alias() 3583 3584 return self.expression( 3585 exp.Lateral, 3586 this=this, 3587 view=view, 3588 outer=outer, 3589 alias=table_alias, 3590 cross_apply=cross_apply, 3591 ) 3592 3593 def _parse_join_parts( 3594 self, 3595 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3596 return ( 3597 self._match_set(self.JOIN_METHODS) and self._prev, 3598 self._match_set(self.JOIN_SIDES) and self._prev, 3599 self._match_set(self.JOIN_KINDS) and self._prev, 3600 ) 3601 3602 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3603 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3604 this = self._parse_column() 3605 if isinstance(this, exp.Column): 3606 return this.this 3607 return this 3608 3609 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3610 3611 def _parse_join( 3612 self, skip_join_token: bool = False, parse_bracket: bool = False 3613 ) -> t.Optional[exp.Join]: 3614 if self._match(TokenType.COMMA): 3615 table = self._try_parse(self._parse_table) 3616 if table: 3617 return self.expression(exp.Join, this=table) 3618 return None 3619 3620 index = self._index 3621 method, side, kind = self._parse_join_parts() 3622 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3623 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3624 3625 if not skip_join_token and not join: 3626 self._retreat(index) 3627 kind = None 3628 method = None 3629 side = None 3630 3631 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3632 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3633 3634 if not skip_join_token and not join and not outer_apply and not cross_apply: 3635 return None 3636 3637 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3638 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3639 kwargs["expressions"] = self._parse_csv( 3640 lambda: self._parse_table(parse_bracket=parse_bracket) 3641 ) 3642 3643 if method: 3644 kwargs["method"] = method.text 3645 if side: 3646 kwargs["side"] = side.text 3647 if kind: 3648 kwargs["kind"] = kind.text 3649 if hint: 3650 kwargs["hint"] = hint 3651 3652 if self._match(TokenType.MATCH_CONDITION): 3653 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3654 3655 if self._match(TokenType.ON): 3656 kwargs["on"] = self._parse_assignment() 3657 elif self._match(TokenType.USING): 3658 kwargs["using"] = self._parse_using_identifiers() 3659 elif ( 3660 not (outer_apply or cross_apply) 3661 and not isinstance(kwargs["this"], exp.Unnest) 3662 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3663 ): 3664 index = self._index 3665 joins: t.Optional[list] = list(self._parse_joins()) 3666 3667 if joins and self._match(TokenType.ON): 3668 kwargs["on"] = self._parse_assignment() 3669 elif joins and self._match(TokenType.USING): 3670 kwargs["using"] = self._parse_using_identifiers() 3671 else: 3672 joins = None 3673 self._retreat(index) 3674 3675 kwargs["this"].set("joins", joins if joins else None) 3676 3677 comments = [c for token in (method, side, kind) if token for c in token.comments] 3678 return self.expression(exp.Join, comments=comments, **kwargs) 3679 3680 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3681 this = self._parse_assignment() 3682 3683 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3684 return this 3685 3686 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3687 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3688 3689 return this 3690 3691 def _parse_index_params(self) -> exp.IndexParameters: 3692 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3693 3694 if self._match(TokenType.L_PAREN, advance=False): 3695 columns = self._parse_wrapped_csv(self._parse_with_operator) 3696 else: 3697 columns = None 3698 3699 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3700 partition_by = self._parse_partition_by() 3701 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3702 tablespace = ( 3703 self._parse_var(any_token=True) 3704 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3705 else None 3706 ) 3707 where = self._parse_where() 3708 3709 on = self._parse_field() if self._match(TokenType.ON) else None 3710 3711 return self.expression( 3712 exp.IndexParameters, 3713 using=using, 3714 columns=columns, 3715 include=include, 3716 partition_by=partition_by, 3717 where=where, 3718 with_storage=with_storage, 3719 tablespace=tablespace, 3720 on=on, 3721 ) 3722 3723 def _parse_index( 3724 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3725 ) -> t.Optional[exp.Index]: 3726 if index or anonymous: 3727 unique = None 3728 primary = None 3729 amp = None 3730 3731 self._match(TokenType.ON) 3732 self._match(TokenType.TABLE) # hive 3733 table = self._parse_table_parts(schema=True) 3734 else: 3735 unique = self._match(TokenType.UNIQUE) 3736 primary = self._match_text_seq("PRIMARY") 3737 amp = self._match_text_seq("AMP") 3738 3739 if not self._match(TokenType.INDEX): 3740 return None 3741 3742 index = self._parse_id_var() 3743 table = None 3744 3745 params = self._parse_index_params() 3746 3747 return self.expression( 3748 exp.Index, 3749 this=index, 3750 table=table, 3751 unique=unique, 3752 primary=primary, 3753 amp=amp, 3754 params=params, 3755 ) 3756 3757 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3758 hints: t.List[exp.Expression] = [] 3759 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3760 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3761 hints.append( 3762 self.expression( 3763 exp.WithTableHint, 3764 expressions=self._parse_csv( 3765 lambda: self._parse_function() or self._parse_var(any_token=True) 3766 ), 3767 ) 3768 ) 3769 self._match_r_paren() 3770 else: 3771 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3772 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3773 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3774 3775 self._match_set((TokenType.INDEX, TokenType.KEY)) 3776 if self._match(TokenType.FOR): 3777 hint.set("target", self._advance_any() and self._prev.text.upper()) 3778 3779 hint.set("expressions", self._parse_wrapped_id_vars()) 3780 hints.append(hint) 3781 3782 return hints or None 3783 3784 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3785 return ( 3786 (not schema and self._parse_function(optional_parens=False)) 3787 or self._parse_id_var(any_token=False) 3788 or self._parse_string_as_identifier() 3789 or self._parse_placeholder() 3790 ) 3791 3792 def _parse_table_parts( 3793 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3794 ) -> exp.Table: 3795 catalog = None 3796 db = None 3797 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3798 3799 while self._match(TokenType.DOT): 3800 if catalog: 3801 # This allows nesting the table in arbitrarily many dot expressions if needed 3802 table = self.expression( 3803 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3804 ) 3805 else: 3806 catalog = db 3807 db = table 3808 # "" used for tsql FROM a..b case 3809 table = self._parse_table_part(schema=schema) or "" 3810 3811 if ( 3812 wildcard 3813 and self._is_connected() 3814 and (isinstance(table, exp.Identifier) or not table) 3815 and self._match(TokenType.STAR) 3816 ): 3817 if isinstance(table, exp.Identifier): 3818 table.args["this"] += "*" 3819 else: 3820 table = exp.Identifier(this="*") 3821 3822 # We bubble up comments from the Identifier to the Table 3823 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3824 3825 if is_db_reference: 3826 catalog = db 3827 db = table 3828 table = None 3829 3830 if not table and not is_db_reference: 3831 self.raise_error(f"Expected table name but got {self._curr}") 3832 if not db and is_db_reference: 3833 self.raise_error(f"Expected database name but got {self._curr}") 3834 3835 table = self.expression( 3836 exp.Table, 3837 comments=comments, 3838 this=table, 3839 db=db, 3840 catalog=catalog, 3841 ) 3842 3843 changes = self._parse_changes() 3844 if changes: 3845 table.set("changes", changes) 3846 3847 at_before = self._parse_historical_data() 3848 if at_before: 3849 table.set("when", at_before) 3850 3851 pivots = self._parse_pivots() 3852 if pivots: 3853 table.set("pivots", pivots) 3854 3855 return table 3856 3857 def _parse_table( 3858 self, 3859 schema: bool = False, 3860 joins: bool = False, 3861 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3862 parse_bracket: bool = False, 3863 is_db_reference: bool = False, 3864 parse_partition: bool = False, 3865 ) -> t.Optional[exp.Expression]: 3866 lateral = self._parse_lateral() 3867 if lateral: 3868 return lateral 3869 3870 unnest = self._parse_unnest() 3871 if unnest: 3872 return unnest 3873 3874 values = self._parse_derived_table_values() 3875 if values: 3876 return values 3877 3878 subquery = self._parse_select(table=True) 3879 if subquery: 3880 if not subquery.args.get("pivots"): 3881 subquery.set("pivots", self._parse_pivots()) 3882 return subquery 3883 3884 bracket = parse_bracket and self._parse_bracket(None) 3885 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3886 3887 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3888 self._parse_table 3889 ) 3890 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3891 3892 only = self._match(TokenType.ONLY) 3893 3894 this = t.cast( 3895 exp.Expression, 3896 bracket 3897 or rows_from 3898 or self._parse_bracket( 3899 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3900 ), 3901 ) 3902 3903 if only: 3904 this.set("only", only) 3905 3906 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3907 self._match_text_seq("*") 3908 3909 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3910 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3911 this.set("partition", self._parse_partition()) 3912 3913 if schema: 3914 return self._parse_schema(this=this) 3915 3916 version = self._parse_version() 3917 3918 if version: 3919 this.set("version", version) 3920 3921 if self.dialect.ALIAS_POST_TABLESAMPLE: 3922 this.set("sample", self._parse_table_sample()) 3923 3924 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3925 if alias: 3926 this.set("alias", alias) 3927 3928 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3929 return self.expression( 3930 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3931 ) 3932 3933 this.set("hints", self._parse_table_hints()) 3934 3935 if not this.args.get("pivots"): 3936 this.set("pivots", self._parse_pivots()) 3937 3938 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3939 this.set("sample", self._parse_table_sample()) 3940 3941 if joins: 3942 for join in self._parse_joins(): 3943 this.append("joins", join) 3944 3945 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3946 this.set("ordinality", True) 3947 this.set("alias", self._parse_table_alias()) 3948 3949 return this 3950 3951 def _parse_version(self) -> t.Optional[exp.Version]: 3952 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3953 this = "TIMESTAMP" 3954 elif self._match(TokenType.VERSION_SNAPSHOT): 3955 this = "VERSION" 3956 else: 3957 return None 3958 3959 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3960 kind = self._prev.text.upper() 3961 start = self._parse_bitwise() 3962 self._match_texts(("TO", "AND")) 3963 end = self._parse_bitwise() 3964 expression: t.Optional[exp.Expression] = self.expression( 3965 exp.Tuple, expressions=[start, end] 3966 ) 3967 elif self._match_text_seq("CONTAINED", "IN"): 3968 kind = "CONTAINED IN" 3969 expression = self.expression( 3970 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3971 ) 3972 elif self._match(TokenType.ALL): 3973 kind = "ALL" 3974 expression = None 3975 else: 3976 self._match_text_seq("AS", "OF") 3977 kind = "AS OF" 3978 expression = self._parse_type() 3979 3980 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3981 3982 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3983 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3984 index = self._index 3985 historical_data = None 3986 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3987 this = self._prev.text.upper() 3988 kind = ( 3989 self._match(TokenType.L_PAREN) 3990 and self._match_texts(self.HISTORICAL_DATA_KIND) 3991 and self._prev.text.upper() 3992 ) 3993 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3994 3995 if expression: 3996 self._match_r_paren() 3997 historical_data = self.expression( 3998 exp.HistoricalData, this=this, kind=kind, expression=expression 3999 ) 4000 else: 4001 self._retreat(index) 4002 4003 return historical_data 4004 4005 def _parse_changes(self) -> t.Optional[exp.Changes]: 4006 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4007 return None 4008 4009 information = self._parse_var(any_token=True) 4010 self._match_r_paren() 4011 4012 return self.expression( 4013 exp.Changes, 4014 information=information, 4015 at_before=self._parse_historical_data(), 4016 end=self._parse_historical_data(), 4017 ) 4018 4019 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4020 if not self._match(TokenType.UNNEST): 4021 return None 4022 4023 expressions = self._parse_wrapped_csv(self._parse_equality) 4024 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4025 4026 alias = self._parse_table_alias() if with_alias else None 4027 4028 if alias: 4029 if self.dialect.UNNEST_COLUMN_ONLY: 4030 if alias.args.get("columns"): 4031 self.raise_error("Unexpected extra column alias in unnest.") 4032 4033 alias.set("columns", [alias.this]) 4034 alias.set("this", None) 4035 4036 columns = alias.args.get("columns") or [] 4037 if offset and len(expressions) < len(columns): 4038 offset = columns.pop() 4039 4040 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4041 self._match(TokenType.ALIAS) 4042 offset = self._parse_id_var( 4043 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4044 ) or exp.to_identifier("offset") 4045 4046 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4047 4048 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4049 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4050 if not is_derived and not ( 4051 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4052 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4053 ): 4054 return None 4055 4056 expressions = self._parse_csv(self._parse_value) 4057 alias = self._parse_table_alias() 4058 4059 if is_derived: 4060 self._match_r_paren() 4061 4062 return self.expression( 4063 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4064 ) 4065 4066 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4067 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4068 as_modifier and self._match_text_seq("USING", "SAMPLE") 4069 ): 4070 return None 4071 4072 bucket_numerator = None 4073 bucket_denominator = None 4074 bucket_field = None 4075 percent = None 4076 size = None 4077 seed = None 4078 4079 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4080 matched_l_paren = self._match(TokenType.L_PAREN) 4081 4082 if self.TABLESAMPLE_CSV: 4083 num = None 4084 expressions = self._parse_csv(self._parse_primary) 4085 else: 4086 expressions = None 4087 num = ( 4088 self._parse_factor() 4089 if self._match(TokenType.NUMBER, advance=False) 4090 else self._parse_primary() or self._parse_placeholder() 4091 ) 4092 4093 if self._match_text_seq("BUCKET"): 4094 bucket_numerator = self._parse_number() 4095 self._match_text_seq("OUT", "OF") 4096 bucket_denominator = bucket_denominator = self._parse_number() 4097 self._match(TokenType.ON) 4098 bucket_field = self._parse_field() 4099 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4100 percent = num 4101 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4102 size = num 4103 else: 4104 percent = num 4105 4106 if matched_l_paren: 4107 self._match_r_paren() 4108 4109 if self._match(TokenType.L_PAREN): 4110 method = self._parse_var(upper=True) 4111 seed = self._match(TokenType.COMMA) and self._parse_number() 4112 self._match_r_paren() 4113 elif self._match_texts(("SEED", "REPEATABLE")): 4114 seed = self._parse_wrapped(self._parse_number) 4115 4116 if not method and self.DEFAULT_SAMPLING_METHOD: 4117 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4118 4119 return self.expression( 4120 exp.TableSample, 4121 expressions=expressions, 4122 method=method, 4123 bucket_numerator=bucket_numerator, 4124 bucket_denominator=bucket_denominator, 4125 bucket_field=bucket_field, 4126 percent=percent, 4127 size=size, 4128 seed=seed, 4129 ) 4130 4131 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4132 return list(iter(self._parse_pivot, None)) or None 4133 4134 def _parse_joins(self) -> t.Iterator[exp.Join]: 4135 return iter(self._parse_join, None) 4136 4137 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4138 if not self._match(TokenType.INTO): 4139 return None 4140 4141 return self.expression( 4142 exp.UnpivotColumns, 4143 this=self._match_text_seq("NAME") and self._parse_column(), 4144 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4145 ) 4146 4147 # https://duckdb.org/docs/sql/statements/pivot 4148 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4149 def _parse_on() -> t.Optional[exp.Expression]: 4150 this = self._parse_bitwise() 4151 4152 if self._match(TokenType.IN): 4153 # PIVOT ... ON col IN (row_val1, row_val2) 4154 return self._parse_in(this) 4155 if self._match(TokenType.ALIAS, advance=False): 4156 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4157 return self._parse_alias(this) 4158 4159 return this 4160 4161 this = self._parse_table() 4162 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4163 into = self._parse_unpivot_columns() 4164 using = self._match(TokenType.USING) and self._parse_csv( 4165 lambda: self._parse_alias(self._parse_function()) 4166 ) 4167 group = self._parse_group() 4168 4169 return self.expression( 4170 exp.Pivot, 4171 this=this, 4172 expressions=expressions, 4173 using=using, 4174 group=group, 4175 unpivot=is_unpivot, 4176 into=into, 4177 ) 4178 4179 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4180 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4181 this = self._parse_select_or_expression() 4182 4183 self._match(TokenType.ALIAS) 4184 alias = self._parse_bitwise() 4185 if alias: 4186 if isinstance(alias, exp.Column) and not alias.db: 4187 alias = alias.this 4188 return self.expression(exp.PivotAlias, this=this, alias=alias) 4189 4190 return this 4191 4192 value = self._parse_column() 4193 4194 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4195 self.raise_error("Expecting IN (") 4196 4197 if self._match(TokenType.ANY): 4198 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4199 else: 4200 exprs = self._parse_csv(_parse_aliased_expression) 4201 4202 self._match_r_paren() 4203 return self.expression(exp.In, this=value, expressions=exprs) 4204 4205 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4206 index = self._index 4207 include_nulls = None 4208 4209 if self._match(TokenType.PIVOT): 4210 unpivot = False 4211 elif self._match(TokenType.UNPIVOT): 4212 unpivot = True 4213 4214 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4215 if self._match_text_seq("INCLUDE", "NULLS"): 4216 include_nulls = True 4217 elif self._match_text_seq("EXCLUDE", "NULLS"): 4218 include_nulls = False 4219 else: 4220 return None 4221 4222 expressions = [] 4223 4224 if not self._match(TokenType.L_PAREN): 4225 self._retreat(index) 4226 return None 4227 4228 if unpivot: 4229 expressions = self._parse_csv(self._parse_column) 4230 else: 4231 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4232 4233 if not expressions: 4234 self.raise_error("Failed to parse PIVOT's aggregation list") 4235 4236 if not self._match(TokenType.FOR): 4237 self.raise_error("Expecting FOR") 4238 4239 field = self._parse_pivot_in() 4240 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4241 self._parse_bitwise 4242 ) 4243 4244 self._match_r_paren() 4245 4246 pivot = self.expression( 4247 exp.Pivot, 4248 expressions=expressions, 4249 field=field, 4250 unpivot=unpivot, 4251 include_nulls=include_nulls, 4252 default_on_null=default_on_null, 4253 ) 4254 4255 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4256 pivot.set("alias", self._parse_table_alias()) 4257 4258 if not unpivot: 4259 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4260 4261 columns: t.List[exp.Expression] = [] 4262 pivot_field_expressions = pivot.args["field"].expressions 4263 4264 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4265 if not isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4266 for fld in pivot_field_expressions: 4267 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4268 for name in names: 4269 if self.PREFIXED_PIVOT_COLUMNS: 4270 name = f"{name}_{field_name}" if name else field_name 4271 else: 4272 name = f"{field_name}_{name}" if name else field_name 4273 4274 columns.append(exp.to_identifier(name)) 4275 4276 pivot.set("columns", columns) 4277 4278 return pivot 4279 4280 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4281 return [agg.alias for agg in aggregations] 4282 4283 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4284 if not skip_where_token and not self._match(TokenType.PREWHERE): 4285 return None 4286 4287 return self.expression( 4288 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4289 ) 4290 4291 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4292 if not skip_where_token and not self._match(TokenType.WHERE): 4293 return None 4294 4295 return self.expression( 4296 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4297 ) 4298 4299 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4300 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4301 return None 4302 4303 elements: t.Dict[str, t.Any] = defaultdict(list) 4304 4305 if self._match(TokenType.ALL): 4306 elements["all"] = True 4307 elif self._match(TokenType.DISTINCT): 4308 elements["all"] = False 4309 4310 while True: 4311 index = self._index 4312 4313 elements["expressions"].extend( 4314 self._parse_csv( 4315 lambda: None 4316 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4317 else self._parse_assignment() 4318 ) 4319 ) 4320 4321 before_with_index = self._index 4322 with_prefix = self._match(TokenType.WITH) 4323 4324 if self._match(TokenType.ROLLUP): 4325 elements["rollup"].append( 4326 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4327 ) 4328 elif self._match(TokenType.CUBE): 4329 elements["cube"].append( 4330 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4331 ) 4332 elif self._match(TokenType.GROUPING_SETS): 4333 elements["grouping_sets"].append( 4334 self.expression( 4335 exp.GroupingSets, 4336 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4337 ) 4338 ) 4339 elif self._match_text_seq("TOTALS"): 4340 elements["totals"] = True # type: ignore 4341 4342 if before_with_index <= self._index <= before_with_index + 1: 4343 self._retreat(before_with_index) 4344 break 4345 4346 if index == self._index: 4347 break 4348 4349 return self.expression(exp.Group, **elements) # type: ignore 4350 4351 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4352 return self.expression( 4353 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4354 ) 4355 4356 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4357 if self._match(TokenType.L_PAREN): 4358 grouping_set = self._parse_csv(self._parse_column) 4359 self._match_r_paren() 4360 return self.expression(exp.Tuple, expressions=grouping_set) 4361 4362 return self._parse_column() 4363 4364 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4365 if not skip_having_token and not self._match(TokenType.HAVING): 4366 return None 4367 return self.expression(exp.Having, this=self._parse_assignment()) 4368 4369 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4370 if not self._match(TokenType.QUALIFY): 4371 return None 4372 return self.expression(exp.Qualify, this=self._parse_assignment()) 4373 4374 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4375 if skip_start_token: 4376 start = None 4377 elif self._match(TokenType.START_WITH): 4378 start = self._parse_assignment() 4379 else: 4380 return None 4381 4382 self._match(TokenType.CONNECT_BY) 4383 nocycle = self._match_text_seq("NOCYCLE") 4384 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4385 exp.Prior, this=self._parse_bitwise() 4386 ) 4387 connect = self._parse_assignment() 4388 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4389 4390 if not start and self._match(TokenType.START_WITH): 4391 start = self._parse_assignment() 4392 4393 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4394 4395 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4396 this = self._parse_id_var(any_token=True) 4397 if self._match(TokenType.ALIAS): 4398 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4399 return this 4400 4401 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4402 if self._match_text_seq("INTERPOLATE"): 4403 return self._parse_wrapped_csv(self._parse_name_as_expression) 4404 return None 4405 4406 def _parse_order( 4407 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4408 ) -> t.Optional[exp.Expression]: 4409 siblings = None 4410 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4411 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4412 return this 4413 4414 siblings = True 4415 4416 return self.expression( 4417 exp.Order, 4418 this=this, 4419 expressions=self._parse_csv(self._parse_ordered), 4420 siblings=siblings, 4421 ) 4422 4423 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4424 if not self._match(token): 4425 return None 4426 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4427 4428 def _parse_ordered( 4429 self, parse_method: t.Optional[t.Callable] = None 4430 ) -> t.Optional[exp.Ordered]: 4431 this = parse_method() if parse_method else self._parse_assignment() 4432 if not this: 4433 return None 4434 4435 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4436 this = exp.var("ALL") 4437 4438 asc = self._match(TokenType.ASC) 4439 desc = self._match(TokenType.DESC) or (asc and False) 4440 4441 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4442 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4443 4444 nulls_first = is_nulls_first or False 4445 explicitly_null_ordered = is_nulls_first or is_nulls_last 4446 4447 if ( 4448 not explicitly_null_ordered 4449 and ( 4450 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4451 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4452 ) 4453 and self.dialect.NULL_ORDERING != "nulls_are_last" 4454 ): 4455 nulls_first = True 4456 4457 if self._match_text_seq("WITH", "FILL"): 4458 with_fill = self.expression( 4459 exp.WithFill, 4460 **{ # type: ignore 4461 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4462 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4463 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4464 "interpolate": self._parse_interpolate(), 4465 }, 4466 ) 4467 else: 4468 with_fill = None 4469 4470 return self.expression( 4471 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4472 ) 4473 4474 def _parse_limit_options(self) -> exp.LimitOptions: 4475 percent = self._match(TokenType.PERCENT) 4476 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4477 self._match_text_seq("ONLY") 4478 with_ties = self._match_text_seq("WITH", "TIES") 4479 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4480 4481 def _parse_limit( 4482 self, 4483 this: t.Optional[exp.Expression] = None, 4484 top: bool = False, 4485 skip_limit_token: bool = False, 4486 ) -> t.Optional[exp.Expression]: 4487 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4488 comments = self._prev_comments 4489 if top: 4490 limit_paren = self._match(TokenType.L_PAREN) 4491 expression = self._parse_term() if limit_paren else self._parse_number() 4492 4493 if limit_paren: 4494 self._match_r_paren() 4495 4496 limit_options = self._parse_limit_options() 4497 else: 4498 limit_options = None 4499 expression = self._parse_term() 4500 4501 if self._match(TokenType.COMMA): 4502 offset = expression 4503 expression = self._parse_term() 4504 else: 4505 offset = None 4506 4507 limit_exp = self.expression( 4508 exp.Limit, 4509 this=this, 4510 expression=expression, 4511 offset=offset, 4512 comments=comments, 4513 limit_options=limit_options, 4514 expressions=self._parse_limit_by(), 4515 ) 4516 4517 return limit_exp 4518 4519 if self._match(TokenType.FETCH): 4520 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4521 direction = self._prev.text.upper() if direction else "FIRST" 4522 4523 count = self._parse_field(tokens=self.FETCH_TOKENS) 4524 4525 return self.expression( 4526 exp.Fetch, 4527 direction=direction, 4528 count=count, 4529 limit_options=self._parse_limit_options(), 4530 ) 4531 4532 return this 4533 4534 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4535 if not self._match(TokenType.OFFSET): 4536 return this 4537 4538 count = self._parse_term() 4539 self._match_set((TokenType.ROW, TokenType.ROWS)) 4540 4541 return self.expression( 4542 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4543 ) 4544 4545 def _can_parse_limit_or_offset(self) -> bool: 4546 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4547 return False 4548 4549 index = self._index 4550 result = bool( 4551 self._try_parse(self._parse_limit, retreat=True) 4552 or self._try_parse(self._parse_offset, retreat=True) 4553 ) 4554 self._retreat(index) 4555 return result 4556 4557 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4558 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4559 4560 def _parse_locks(self) -> t.List[exp.Lock]: 4561 locks = [] 4562 while True: 4563 if self._match_text_seq("FOR", "UPDATE"): 4564 update = True 4565 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4566 "LOCK", "IN", "SHARE", "MODE" 4567 ): 4568 update = False 4569 else: 4570 break 4571 4572 expressions = None 4573 if self._match_text_seq("OF"): 4574 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4575 4576 wait: t.Optional[bool | exp.Expression] = None 4577 if self._match_text_seq("NOWAIT"): 4578 wait = True 4579 elif self._match_text_seq("WAIT"): 4580 wait = self._parse_primary() 4581 elif self._match_text_seq("SKIP", "LOCKED"): 4582 wait = False 4583 4584 locks.append( 4585 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4586 ) 4587 4588 return locks 4589 4590 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4591 while this and self._match_set(self.SET_OPERATIONS): 4592 token_type = self._prev.token_type 4593 4594 if token_type == TokenType.UNION: 4595 operation: t.Type[exp.SetOperation] = exp.Union 4596 elif token_type == TokenType.EXCEPT: 4597 operation = exp.Except 4598 else: 4599 operation = exp.Intersect 4600 4601 comments = self._prev.comments 4602 4603 if self._match(TokenType.DISTINCT): 4604 distinct: t.Optional[bool] = True 4605 elif self._match(TokenType.ALL): 4606 distinct = False 4607 else: 4608 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4609 if distinct is None: 4610 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4611 4612 by_name = self._match_text_seq("BY", "NAME") 4613 expression = self._parse_select(nested=True, parse_set_operation=False) 4614 4615 this = self.expression( 4616 operation, 4617 comments=comments, 4618 this=this, 4619 distinct=distinct, 4620 by_name=by_name, 4621 expression=expression, 4622 ) 4623 4624 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4625 expression = this.expression 4626 4627 if expression: 4628 for arg in self.SET_OP_MODIFIERS: 4629 expr = expression.args.get(arg) 4630 if expr: 4631 this.set(arg, expr.pop()) 4632 4633 return this 4634 4635 def _parse_expression(self) -> t.Optional[exp.Expression]: 4636 return self._parse_alias(self._parse_assignment()) 4637 4638 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4639 this = self._parse_disjunction() 4640 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4641 # This allows us to parse <non-identifier token> := <expr> 4642 this = exp.column( 4643 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4644 ) 4645 4646 while self._match_set(self.ASSIGNMENT): 4647 if isinstance(this, exp.Column) and len(this.parts) == 1: 4648 this = this.this 4649 4650 this = self.expression( 4651 self.ASSIGNMENT[self._prev.token_type], 4652 this=this, 4653 comments=self._prev_comments, 4654 expression=self._parse_assignment(), 4655 ) 4656 4657 return this 4658 4659 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4660 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4661 4662 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4663 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4664 4665 def _parse_equality(self) -> t.Optional[exp.Expression]: 4666 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4667 4668 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4669 return self._parse_tokens(self._parse_range, self.COMPARISON) 4670 4671 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4672 this = this or self._parse_bitwise() 4673 negate = self._match(TokenType.NOT) 4674 4675 if self._match_set(self.RANGE_PARSERS): 4676 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4677 if not expression: 4678 return this 4679 4680 this = expression 4681 elif self._match(TokenType.ISNULL): 4682 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4683 4684 # Postgres supports ISNULL and NOTNULL for conditions. 4685 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4686 if self._match(TokenType.NOTNULL): 4687 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4688 this = self.expression(exp.Not, this=this) 4689 4690 if negate: 4691 this = self._negate_range(this) 4692 4693 if self._match(TokenType.IS): 4694 this = self._parse_is(this) 4695 4696 return this 4697 4698 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4699 if not this: 4700 return this 4701 4702 return self.expression(exp.Not, this=this) 4703 4704 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4705 index = self._index - 1 4706 negate = self._match(TokenType.NOT) 4707 4708 if self._match_text_seq("DISTINCT", "FROM"): 4709 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4710 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4711 4712 if self._match(TokenType.JSON): 4713 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4714 4715 if self._match_text_seq("WITH"): 4716 _with = True 4717 elif self._match_text_seq("WITHOUT"): 4718 _with = False 4719 else: 4720 _with = None 4721 4722 unique = self._match(TokenType.UNIQUE) 4723 self._match_text_seq("KEYS") 4724 expression: t.Optional[exp.Expression] = self.expression( 4725 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4726 ) 4727 else: 4728 expression = self._parse_primary() or self._parse_null() 4729 if not expression: 4730 self._retreat(index) 4731 return None 4732 4733 this = self.expression(exp.Is, this=this, expression=expression) 4734 return self.expression(exp.Not, this=this) if negate else this 4735 4736 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4737 unnest = self._parse_unnest(with_alias=False) 4738 if unnest: 4739 this = self.expression(exp.In, this=this, unnest=unnest) 4740 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4741 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4742 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4743 4744 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4745 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4746 else: 4747 this = self.expression(exp.In, this=this, expressions=expressions) 4748 4749 if matched_l_paren: 4750 self._match_r_paren(this) 4751 elif not self._match(TokenType.R_BRACKET, expression=this): 4752 self.raise_error("Expecting ]") 4753 else: 4754 this = self.expression(exp.In, this=this, field=self._parse_column()) 4755 4756 return this 4757 4758 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4759 low = self._parse_bitwise() 4760 self._match(TokenType.AND) 4761 high = self._parse_bitwise() 4762 return self.expression(exp.Between, this=this, low=low, high=high) 4763 4764 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4765 if not self._match(TokenType.ESCAPE): 4766 return this 4767 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4768 4769 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4770 index = self._index 4771 4772 if not self._match(TokenType.INTERVAL) and match_interval: 4773 return None 4774 4775 if self._match(TokenType.STRING, advance=False): 4776 this = self._parse_primary() 4777 else: 4778 this = self._parse_term() 4779 4780 if not this or ( 4781 isinstance(this, exp.Column) 4782 and not this.table 4783 and not this.this.quoted 4784 and this.name.upper() == "IS" 4785 ): 4786 self._retreat(index) 4787 return None 4788 4789 unit = self._parse_function() or ( 4790 not self._match(TokenType.ALIAS, advance=False) 4791 and self._parse_var(any_token=True, upper=True) 4792 ) 4793 4794 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4795 # each INTERVAL expression into this canonical form so it's easy to transpile 4796 if this and this.is_number: 4797 this = exp.Literal.string(this.to_py()) 4798 elif this and this.is_string: 4799 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4800 if parts and unit: 4801 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4802 unit = None 4803 self._retreat(self._index - 1) 4804 4805 if len(parts) == 1: 4806 this = exp.Literal.string(parts[0][0]) 4807 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4808 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4809 unit = self.expression( 4810 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4811 ) 4812 4813 interval = self.expression(exp.Interval, this=this, unit=unit) 4814 4815 index = self._index 4816 self._match(TokenType.PLUS) 4817 4818 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4819 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4820 return self.expression( 4821 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4822 ) 4823 4824 self._retreat(index) 4825 return interval 4826 4827 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4828 this = self._parse_term() 4829 4830 while True: 4831 if self._match_set(self.BITWISE): 4832 this = self.expression( 4833 self.BITWISE[self._prev.token_type], 4834 this=this, 4835 expression=self._parse_term(), 4836 ) 4837 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4838 this = self.expression( 4839 exp.DPipe, 4840 this=this, 4841 expression=self._parse_term(), 4842 safe=not self.dialect.STRICT_STRING_CONCAT, 4843 ) 4844 elif self._match(TokenType.DQMARK): 4845 this = self.expression( 4846 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4847 ) 4848 elif self._match_pair(TokenType.LT, TokenType.LT): 4849 this = self.expression( 4850 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4851 ) 4852 elif self._match_pair(TokenType.GT, TokenType.GT): 4853 this = self.expression( 4854 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4855 ) 4856 else: 4857 break 4858 4859 return this 4860 4861 def _parse_term(self) -> t.Optional[exp.Expression]: 4862 this = self._parse_factor() 4863 4864 while self._match_set(self.TERM): 4865 klass = self.TERM[self._prev.token_type] 4866 comments = self._prev_comments 4867 expression = self._parse_factor() 4868 4869 this = self.expression(klass, this=this, comments=comments, expression=expression) 4870 4871 if isinstance(this, exp.Collate): 4872 expr = this.expression 4873 4874 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4875 # fallback to Identifier / Var 4876 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4877 ident = expr.this 4878 if isinstance(ident, exp.Identifier): 4879 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4880 4881 return this 4882 4883 def _parse_factor(self) -> t.Optional[exp.Expression]: 4884 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4885 this = parse_method() 4886 4887 while self._match_set(self.FACTOR): 4888 klass = self.FACTOR[self._prev.token_type] 4889 comments = self._prev_comments 4890 expression = parse_method() 4891 4892 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4893 self._retreat(self._index - 1) 4894 return this 4895 4896 this = self.expression(klass, this=this, comments=comments, expression=expression) 4897 4898 if isinstance(this, exp.Div): 4899 this.args["typed"] = self.dialect.TYPED_DIVISION 4900 this.args["safe"] = self.dialect.SAFE_DIVISION 4901 4902 return this 4903 4904 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4905 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4906 4907 def _parse_unary(self) -> t.Optional[exp.Expression]: 4908 if self._match_set(self.UNARY_PARSERS): 4909 return self.UNARY_PARSERS[self._prev.token_type](self) 4910 return self._parse_at_time_zone(self._parse_type()) 4911 4912 def _parse_type( 4913 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4914 ) -> t.Optional[exp.Expression]: 4915 interval = parse_interval and self._parse_interval() 4916 if interval: 4917 return interval 4918 4919 index = self._index 4920 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4921 4922 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4923 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4924 if isinstance(data_type, exp.Cast): 4925 # This constructor can contain ops directly after it, for instance struct unnesting: 4926 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4927 return self._parse_column_ops(data_type) 4928 4929 if data_type: 4930 index2 = self._index 4931 this = self._parse_primary() 4932 4933 if isinstance(this, exp.Literal): 4934 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4935 if parser: 4936 return parser(self, this, data_type) 4937 4938 return self.expression(exp.Cast, this=this, to=data_type) 4939 4940 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4941 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4942 # 4943 # If the index difference here is greater than 1, that means the parser itself must have 4944 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4945 # 4946 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4947 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4948 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4949 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4950 # 4951 # In these cases, we don't really want to return the converted type, but instead retreat 4952 # and try to parse a Column or Identifier in the section below. 4953 if data_type.expressions and index2 - index > 1: 4954 self._retreat(index2) 4955 return self._parse_column_ops(data_type) 4956 4957 self._retreat(index) 4958 4959 if fallback_to_identifier: 4960 return self._parse_id_var() 4961 4962 this = self._parse_column() 4963 return this and self._parse_column_ops(this) 4964 4965 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4966 this = self._parse_type() 4967 if not this: 4968 return None 4969 4970 if isinstance(this, exp.Column) and not this.table: 4971 this = exp.var(this.name.upper()) 4972 4973 return self.expression( 4974 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4975 ) 4976 4977 def _parse_types( 4978 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4979 ) -> t.Optional[exp.Expression]: 4980 index = self._index 4981 4982 this: t.Optional[exp.Expression] = None 4983 prefix = self._match_text_seq("SYSUDTLIB", ".") 4984 4985 if not self._match_set(self.TYPE_TOKENS): 4986 identifier = allow_identifiers and self._parse_id_var( 4987 any_token=False, tokens=(TokenType.VAR,) 4988 ) 4989 if isinstance(identifier, exp.Identifier): 4990 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4991 4992 if len(tokens) != 1: 4993 self.raise_error("Unexpected identifier", self._prev) 4994 4995 if tokens[0].token_type in self.TYPE_TOKENS: 4996 self._prev = tokens[0] 4997 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4998 type_name = identifier.name 4999 5000 while self._match(TokenType.DOT): 5001 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5002 5003 this = exp.DataType.build(type_name, udt=True) 5004 else: 5005 self._retreat(self._index - 1) 5006 return None 5007 else: 5008 return None 5009 5010 type_token = self._prev.token_type 5011 5012 if type_token == TokenType.PSEUDO_TYPE: 5013 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5014 5015 if type_token == TokenType.OBJECT_IDENTIFIER: 5016 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5017 5018 # https://materialize.com/docs/sql/types/map/ 5019 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5020 key_type = self._parse_types( 5021 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5022 ) 5023 if not self._match(TokenType.FARROW): 5024 self._retreat(index) 5025 return None 5026 5027 value_type = self._parse_types( 5028 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5029 ) 5030 if not self._match(TokenType.R_BRACKET): 5031 self._retreat(index) 5032 return None 5033 5034 return exp.DataType( 5035 this=exp.DataType.Type.MAP, 5036 expressions=[key_type, value_type], 5037 nested=True, 5038 prefix=prefix, 5039 ) 5040 5041 nested = type_token in self.NESTED_TYPE_TOKENS 5042 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5043 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5044 expressions = None 5045 maybe_func = False 5046 5047 if self._match(TokenType.L_PAREN): 5048 if is_struct: 5049 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5050 elif nested: 5051 expressions = self._parse_csv( 5052 lambda: self._parse_types( 5053 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5054 ) 5055 ) 5056 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5057 this = expressions[0] 5058 this.set("nullable", True) 5059 self._match_r_paren() 5060 return this 5061 elif type_token in self.ENUM_TYPE_TOKENS: 5062 expressions = self._parse_csv(self._parse_equality) 5063 elif is_aggregate: 5064 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5065 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5066 ) 5067 if not func_or_ident: 5068 return None 5069 expressions = [func_or_ident] 5070 if self._match(TokenType.COMMA): 5071 expressions.extend( 5072 self._parse_csv( 5073 lambda: self._parse_types( 5074 check_func=check_func, 5075 schema=schema, 5076 allow_identifiers=allow_identifiers, 5077 ) 5078 ) 5079 ) 5080 else: 5081 expressions = self._parse_csv(self._parse_type_size) 5082 5083 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5084 if type_token == TokenType.VECTOR and len(expressions) == 2: 5085 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5086 5087 if not expressions or not self._match(TokenType.R_PAREN): 5088 self._retreat(index) 5089 return None 5090 5091 maybe_func = True 5092 5093 values: t.Optional[t.List[exp.Expression]] = None 5094 5095 if nested and self._match(TokenType.LT): 5096 if is_struct: 5097 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5098 else: 5099 expressions = self._parse_csv( 5100 lambda: self._parse_types( 5101 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5102 ) 5103 ) 5104 5105 if not self._match(TokenType.GT): 5106 self.raise_error("Expecting >") 5107 5108 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5109 values = self._parse_csv(self._parse_assignment) 5110 if not values and is_struct: 5111 values = None 5112 self._retreat(self._index - 1) 5113 else: 5114 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5115 5116 if type_token in self.TIMESTAMPS: 5117 if self._match_text_seq("WITH", "TIME", "ZONE"): 5118 maybe_func = False 5119 tz_type = ( 5120 exp.DataType.Type.TIMETZ 5121 if type_token in self.TIMES 5122 else exp.DataType.Type.TIMESTAMPTZ 5123 ) 5124 this = exp.DataType(this=tz_type, expressions=expressions) 5125 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5126 maybe_func = False 5127 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5128 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5129 maybe_func = False 5130 elif type_token == TokenType.INTERVAL: 5131 unit = self._parse_var(upper=True) 5132 if unit: 5133 if self._match_text_seq("TO"): 5134 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5135 5136 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5137 else: 5138 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5139 5140 if maybe_func and check_func: 5141 index2 = self._index 5142 peek = self._parse_string() 5143 5144 if not peek: 5145 self._retreat(index) 5146 return None 5147 5148 self._retreat(index2) 5149 5150 if not this: 5151 if self._match_text_seq("UNSIGNED"): 5152 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5153 if not unsigned_type_token: 5154 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5155 5156 type_token = unsigned_type_token or type_token 5157 5158 this = exp.DataType( 5159 this=exp.DataType.Type[type_token.value], 5160 expressions=expressions, 5161 nested=nested, 5162 prefix=prefix, 5163 ) 5164 5165 # Empty arrays/structs are allowed 5166 if values is not None: 5167 cls = exp.Struct if is_struct else exp.Array 5168 this = exp.cast(cls(expressions=values), this, copy=False) 5169 5170 elif expressions: 5171 this.set("expressions", expressions) 5172 5173 # https://materialize.com/docs/sql/types/list/#type-name 5174 while self._match(TokenType.LIST): 5175 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5176 5177 index = self._index 5178 5179 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5180 matched_array = self._match(TokenType.ARRAY) 5181 5182 while self._curr: 5183 datatype_token = self._prev.token_type 5184 matched_l_bracket = self._match(TokenType.L_BRACKET) 5185 5186 if (not matched_l_bracket and not matched_array) or ( 5187 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5188 ): 5189 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5190 # not to be confused with the fixed size array parsing 5191 break 5192 5193 matched_array = False 5194 values = self._parse_csv(self._parse_assignment) or None 5195 if ( 5196 values 5197 and not schema 5198 and ( 5199 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5200 ) 5201 ): 5202 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5203 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5204 self._retreat(index) 5205 break 5206 5207 this = exp.DataType( 5208 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5209 ) 5210 self._match(TokenType.R_BRACKET) 5211 5212 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5213 converter = self.TYPE_CONVERTERS.get(this.this) 5214 if converter: 5215 this = converter(t.cast(exp.DataType, this)) 5216 5217 return this 5218 5219 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5220 index = self._index 5221 5222 if ( 5223 self._curr 5224 and self._next 5225 and self._curr.token_type in self.TYPE_TOKENS 5226 and self._next.token_type in self.TYPE_TOKENS 5227 ): 5228 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5229 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5230 this = self._parse_id_var() 5231 else: 5232 this = ( 5233 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5234 or self._parse_id_var() 5235 ) 5236 5237 self._match(TokenType.COLON) 5238 5239 if ( 5240 type_required 5241 and not isinstance(this, exp.DataType) 5242 and not self._match_set(self.TYPE_TOKENS, advance=False) 5243 ): 5244 self._retreat(index) 5245 return self._parse_types() 5246 5247 return self._parse_column_def(this) 5248 5249 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5250 if not self._match_text_seq("AT", "TIME", "ZONE"): 5251 return this 5252 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5253 5254 def _parse_column(self) -> t.Optional[exp.Expression]: 5255 this = self._parse_column_reference() 5256 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5257 5258 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5259 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5260 5261 return column 5262 5263 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5264 this = self._parse_field() 5265 if ( 5266 not this 5267 and self._match(TokenType.VALUES, advance=False) 5268 and self.VALUES_FOLLOWED_BY_PAREN 5269 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5270 ): 5271 this = self._parse_id_var() 5272 5273 if isinstance(this, exp.Identifier): 5274 # We bubble up comments from the Identifier to the Column 5275 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5276 5277 return this 5278 5279 def _parse_colon_as_variant_extract( 5280 self, this: t.Optional[exp.Expression] 5281 ) -> t.Optional[exp.Expression]: 5282 casts = [] 5283 json_path = [] 5284 escape = None 5285 5286 while self._match(TokenType.COLON): 5287 start_index = self._index 5288 5289 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5290 path = self._parse_column_ops( 5291 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5292 ) 5293 5294 # The cast :: operator has a lower precedence than the extraction operator :, so 5295 # we rearrange the AST appropriately to avoid casting the JSON path 5296 while isinstance(path, exp.Cast): 5297 casts.append(path.to) 5298 path = path.this 5299 5300 if casts: 5301 dcolon_offset = next( 5302 i 5303 for i, t in enumerate(self._tokens[start_index:]) 5304 if t.token_type == TokenType.DCOLON 5305 ) 5306 end_token = self._tokens[start_index + dcolon_offset - 1] 5307 else: 5308 end_token = self._prev 5309 5310 if path: 5311 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5312 # it'll roundtrip to a string literal in GET_PATH 5313 if isinstance(path, exp.Identifier) and path.quoted: 5314 escape = True 5315 5316 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5317 5318 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5319 # Databricks transforms it back to the colon/dot notation 5320 if json_path: 5321 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5322 5323 if json_path_expr: 5324 json_path_expr.set("escape", escape) 5325 5326 this = self.expression( 5327 exp.JSONExtract, 5328 this=this, 5329 expression=json_path_expr, 5330 variant_extract=True, 5331 ) 5332 5333 while casts: 5334 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5335 5336 return this 5337 5338 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5339 return self._parse_types() 5340 5341 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5342 this = self._parse_bracket(this) 5343 5344 while self._match_set(self.COLUMN_OPERATORS): 5345 op_token = self._prev.token_type 5346 op = self.COLUMN_OPERATORS.get(op_token) 5347 5348 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5349 field = self._parse_dcolon() 5350 if not field: 5351 self.raise_error("Expected type") 5352 elif op and self._curr: 5353 field = self._parse_column_reference() or self._parse_bracket() 5354 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5355 field = self._parse_column_ops(field) 5356 else: 5357 field = self._parse_field(any_token=True, anonymous_func=True) 5358 5359 if isinstance(field, (exp.Func, exp.Window)) and this: 5360 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5361 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5362 this = exp.replace_tree( 5363 this, 5364 lambda n: ( 5365 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5366 if n.table 5367 else n.this 5368 ) 5369 if isinstance(n, exp.Column) 5370 else n, 5371 ) 5372 5373 if op: 5374 this = op(self, this, field) 5375 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5376 this = self.expression( 5377 exp.Column, 5378 comments=this.comments, 5379 this=field, 5380 table=this.this, 5381 db=this.args.get("table"), 5382 catalog=this.args.get("db"), 5383 ) 5384 elif isinstance(field, exp.Window): 5385 # Move the exp.Dot's to the window's function 5386 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5387 field.set("this", window_func) 5388 this = field 5389 else: 5390 this = self.expression(exp.Dot, this=this, expression=field) 5391 5392 if field and field.comments: 5393 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5394 5395 this = self._parse_bracket(this) 5396 5397 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5398 5399 def _parse_primary(self) -> t.Optional[exp.Expression]: 5400 if self._match_set(self.PRIMARY_PARSERS): 5401 token_type = self._prev.token_type 5402 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5403 5404 if token_type == TokenType.STRING: 5405 expressions = [primary] 5406 while self._match(TokenType.STRING): 5407 expressions.append(exp.Literal.string(self._prev.text)) 5408 5409 if len(expressions) > 1: 5410 return self.expression(exp.Concat, expressions=expressions) 5411 5412 return primary 5413 5414 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5415 return exp.Literal.number(f"0.{self._prev.text}") 5416 5417 if self._match(TokenType.L_PAREN): 5418 comments = self._prev_comments 5419 query = self._parse_select() 5420 5421 if query: 5422 expressions = [query] 5423 else: 5424 expressions = self._parse_expressions() 5425 5426 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5427 5428 if not this and self._match(TokenType.R_PAREN, advance=False): 5429 this = self.expression(exp.Tuple) 5430 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5431 this = self._parse_subquery(this=this, parse_alias=False) 5432 elif isinstance(this, exp.Subquery): 5433 this = self._parse_subquery( 5434 this=self._parse_set_operations(this), parse_alias=False 5435 ) 5436 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5437 this = self.expression(exp.Tuple, expressions=expressions) 5438 else: 5439 this = self.expression(exp.Paren, this=this) 5440 5441 if this: 5442 this.add_comments(comments) 5443 5444 self._match_r_paren(expression=this) 5445 return this 5446 5447 return None 5448 5449 def _parse_field( 5450 self, 5451 any_token: bool = False, 5452 tokens: t.Optional[t.Collection[TokenType]] = None, 5453 anonymous_func: bool = False, 5454 ) -> t.Optional[exp.Expression]: 5455 if anonymous_func: 5456 field = ( 5457 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5458 or self._parse_primary() 5459 ) 5460 else: 5461 field = self._parse_primary() or self._parse_function( 5462 anonymous=anonymous_func, any_token=any_token 5463 ) 5464 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5465 5466 def _parse_function( 5467 self, 5468 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5469 anonymous: bool = False, 5470 optional_parens: bool = True, 5471 any_token: bool = False, 5472 ) -> t.Optional[exp.Expression]: 5473 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5474 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5475 fn_syntax = False 5476 if ( 5477 self._match(TokenType.L_BRACE, advance=False) 5478 and self._next 5479 and self._next.text.upper() == "FN" 5480 ): 5481 self._advance(2) 5482 fn_syntax = True 5483 5484 func = self._parse_function_call( 5485 functions=functions, 5486 anonymous=anonymous, 5487 optional_parens=optional_parens, 5488 any_token=any_token, 5489 ) 5490 5491 if fn_syntax: 5492 self._match(TokenType.R_BRACE) 5493 5494 return func 5495 5496 def _parse_function_call( 5497 self, 5498 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5499 anonymous: bool = False, 5500 optional_parens: bool = True, 5501 any_token: bool = False, 5502 ) -> t.Optional[exp.Expression]: 5503 if not self._curr: 5504 return None 5505 5506 comments = self._curr.comments 5507 token_type = self._curr.token_type 5508 this = self._curr.text 5509 upper = this.upper() 5510 5511 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5512 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5513 self._advance() 5514 return self._parse_window(parser(self)) 5515 5516 if not self._next or self._next.token_type != TokenType.L_PAREN: 5517 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5518 self._advance() 5519 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5520 5521 return None 5522 5523 if any_token: 5524 if token_type in self.RESERVED_TOKENS: 5525 return None 5526 elif token_type not in self.FUNC_TOKENS: 5527 return None 5528 5529 self._advance(2) 5530 5531 parser = self.FUNCTION_PARSERS.get(upper) 5532 if parser and not anonymous: 5533 this = parser(self) 5534 else: 5535 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5536 5537 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5538 this = self.expression( 5539 subquery_predicate, comments=comments, this=self._parse_select() 5540 ) 5541 self._match_r_paren() 5542 return this 5543 5544 if functions is None: 5545 functions = self.FUNCTIONS 5546 5547 function = functions.get(upper) 5548 known_function = function and not anonymous 5549 5550 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5551 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5552 5553 post_func_comments = self._curr and self._curr.comments 5554 if known_function and post_func_comments: 5555 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5556 # call we'll construct it as exp.Anonymous, even if it's "known" 5557 if any( 5558 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5559 for comment in post_func_comments 5560 ): 5561 known_function = False 5562 5563 if alias and known_function: 5564 args = self._kv_to_prop_eq(args) 5565 5566 if known_function: 5567 func_builder = t.cast(t.Callable, function) 5568 5569 if "dialect" in func_builder.__code__.co_varnames: 5570 func = func_builder(args, dialect=self.dialect) 5571 else: 5572 func = func_builder(args) 5573 5574 func = self.validate_expression(func, args) 5575 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5576 func.meta["name"] = this 5577 5578 this = func 5579 else: 5580 if token_type == TokenType.IDENTIFIER: 5581 this = exp.Identifier(this=this, quoted=True) 5582 this = self.expression(exp.Anonymous, this=this, expressions=args) 5583 5584 if isinstance(this, exp.Expression): 5585 this.add_comments(comments) 5586 5587 self._match_r_paren(this) 5588 return self._parse_window(this) 5589 5590 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5591 return expression 5592 5593 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5594 transformed = [] 5595 5596 for index, e in enumerate(expressions): 5597 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5598 if isinstance(e, exp.Alias): 5599 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5600 5601 if not isinstance(e, exp.PropertyEQ): 5602 e = self.expression( 5603 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5604 ) 5605 5606 if isinstance(e.this, exp.Column): 5607 e.this.replace(e.this.this) 5608 else: 5609 e = self._to_prop_eq(e, index) 5610 5611 transformed.append(e) 5612 5613 return transformed 5614 5615 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5616 return self._parse_statement() 5617 5618 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5619 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5620 5621 def _parse_user_defined_function( 5622 self, kind: t.Optional[TokenType] = None 5623 ) -> t.Optional[exp.Expression]: 5624 this = self._parse_table_parts(schema=True) 5625 5626 if not self._match(TokenType.L_PAREN): 5627 return this 5628 5629 expressions = self._parse_csv(self._parse_function_parameter) 5630 self._match_r_paren() 5631 return self.expression( 5632 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5633 ) 5634 5635 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5636 literal = self._parse_primary() 5637 if literal: 5638 return self.expression(exp.Introducer, this=token.text, expression=literal) 5639 5640 return self.expression(exp.Identifier, this=token.text) 5641 5642 def _parse_session_parameter(self) -> exp.SessionParameter: 5643 kind = None 5644 this = self._parse_id_var() or self._parse_primary() 5645 5646 if this and self._match(TokenType.DOT): 5647 kind = this.name 5648 this = self._parse_var() or self._parse_primary() 5649 5650 return self.expression(exp.SessionParameter, this=this, kind=kind) 5651 5652 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5653 return self._parse_id_var() 5654 5655 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5656 index = self._index 5657 5658 if self._match(TokenType.L_PAREN): 5659 expressions = t.cast( 5660 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5661 ) 5662 5663 if not self._match(TokenType.R_PAREN): 5664 self._retreat(index) 5665 else: 5666 expressions = [self._parse_lambda_arg()] 5667 5668 if self._match_set(self.LAMBDAS): 5669 return self.LAMBDAS[self._prev.token_type](self, expressions) 5670 5671 self._retreat(index) 5672 5673 this: t.Optional[exp.Expression] 5674 5675 if self._match(TokenType.DISTINCT): 5676 this = self.expression( 5677 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5678 ) 5679 else: 5680 this = self._parse_select_or_expression(alias=alias) 5681 5682 return self._parse_limit( 5683 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5684 ) 5685 5686 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5687 index = self._index 5688 if not self._match(TokenType.L_PAREN): 5689 return this 5690 5691 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5692 # expr can be of both types 5693 if self._match_set(self.SELECT_START_TOKENS): 5694 self._retreat(index) 5695 return this 5696 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5697 self._match_r_paren() 5698 return self.expression(exp.Schema, this=this, expressions=args) 5699 5700 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5701 return self._parse_column_def(self._parse_field(any_token=True)) 5702 5703 def _parse_column_def( 5704 self, this: t.Optional[exp.Expression], computed_column: bool = True 5705 ) -> t.Optional[exp.Expression]: 5706 # column defs are not really columns, they're identifiers 5707 if isinstance(this, exp.Column): 5708 this = this.this 5709 5710 if not computed_column: 5711 self._match(TokenType.ALIAS) 5712 5713 kind = self._parse_types(schema=True) 5714 5715 if self._match_text_seq("FOR", "ORDINALITY"): 5716 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5717 5718 constraints: t.List[exp.Expression] = [] 5719 5720 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5721 ("ALIAS", "MATERIALIZED") 5722 ): 5723 persisted = self._prev.text.upper() == "MATERIALIZED" 5724 constraint_kind = exp.ComputedColumnConstraint( 5725 this=self._parse_assignment(), 5726 persisted=persisted or self._match_text_seq("PERSISTED"), 5727 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5728 ) 5729 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5730 elif ( 5731 kind 5732 and self._match(TokenType.ALIAS, advance=False) 5733 and ( 5734 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5735 or (self._next and self._next.token_type == TokenType.L_PAREN) 5736 ) 5737 ): 5738 self._advance() 5739 constraints.append( 5740 self.expression( 5741 exp.ColumnConstraint, 5742 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5743 ) 5744 ) 5745 5746 while True: 5747 constraint = self._parse_column_constraint() 5748 if not constraint: 5749 break 5750 constraints.append(constraint) 5751 5752 if not kind and not constraints: 5753 return this 5754 5755 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5756 5757 def _parse_auto_increment( 5758 self, 5759 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5760 start = None 5761 increment = None 5762 5763 if self._match(TokenType.L_PAREN, advance=False): 5764 args = self._parse_wrapped_csv(self._parse_bitwise) 5765 start = seq_get(args, 0) 5766 increment = seq_get(args, 1) 5767 elif self._match_text_seq("START"): 5768 start = self._parse_bitwise() 5769 self._match_text_seq("INCREMENT") 5770 increment = self._parse_bitwise() 5771 5772 if start and increment: 5773 return exp.GeneratedAsIdentityColumnConstraint( 5774 start=start, increment=increment, this=False 5775 ) 5776 5777 return exp.AutoIncrementColumnConstraint() 5778 5779 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5780 if not self._match_text_seq("REFRESH"): 5781 self._retreat(self._index - 1) 5782 return None 5783 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5784 5785 def _parse_compress(self) -> exp.CompressColumnConstraint: 5786 if self._match(TokenType.L_PAREN, advance=False): 5787 return self.expression( 5788 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5789 ) 5790 5791 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5792 5793 def _parse_generated_as_identity( 5794 self, 5795 ) -> ( 5796 exp.GeneratedAsIdentityColumnConstraint 5797 | exp.ComputedColumnConstraint 5798 | exp.GeneratedAsRowColumnConstraint 5799 ): 5800 if self._match_text_seq("BY", "DEFAULT"): 5801 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5802 this = self.expression( 5803 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5804 ) 5805 else: 5806 self._match_text_seq("ALWAYS") 5807 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5808 5809 self._match(TokenType.ALIAS) 5810 5811 if self._match_text_seq("ROW"): 5812 start = self._match_text_seq("START") 5813 if not start: 5814 self._match(TokenType.END) 5815 hidden = self._match_text_seq("HIDDEN") 5816 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5817 5818 identity = self._match_text_seq("IDENTITY") 5819 5820 if self._match(TokenType.L_PAREN): 5821 if self._match(TokenType.START_WITH): 5822 this.set("start", self._parse_bitwise()) 5823 if self._match_text_seq("INCREMENT", "BY"): 5824 this.set("increment", self._parse_bitwise()) 5825 if self._match_text_seq("MINVALUE"): 5826 this.set("minvalue", self._parse_bitwise()) 5827 if self._match_text_seq("MAXVALUE"): 5828 this.set("maxvalue", self._parse_bitwise()) 5829 5830 if self._match_text_seq("CYCLE"): 5831 this.set("cycle", True) 5832 elif self._match_text_seq("NO", "CYCLE"): 5833 this.set("cycle", False) 5834 5835 if not identity: 5836 this.set("expression", self._parse_range()) 5837 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5838 args = self._parse_csv(self._parse_bitwise) 5839 this.set("start", seq_get(args, 0)) 5840 this.set("increment", seq_get(args, 1)) 5841 5842 self._match_r_paren() 5843 5844 return this 5845 5846 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5847 self._match_text_seq("LENGTH") 5848 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5849 5850 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5851 if self._match_text_seq("NULL"): 5852 return self.expression(exp.NotNullColumnConstraint) 5853 if self._match_text_seq("CASESPECIFIC"): 5854 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5855 if self._match_text_seq("FOR", "REPLICATION"): 5856 return self.expression(exp.NotForReplicationColumnConstraint) 5857 5858 # Unconsume the `NOT` token 5859 self._retreat(self._index - 1) 5860 return None 5861 5862 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5863 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5864 5865 procedure_option_follows = ( 5866 self._match(TokenType.WITH, advance=False) 5867 and self._next 5868 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5869 ) 5870 5871 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5872 return self.expression( 5873 exp.ColumnConstraint, 5874 this=this, 5875 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5876 ) 5877 5878 return this 5879 5880 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5881 if not self._match(TokenType.CONSTRAINT): 5882 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5883 5884 return self.expression( 5885 exp.Constraint, 5886 this=self._parse_id_var(), 5887 expressions=self._parse_unnamed_constraints(), 5888 ) 5889 5890 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5891 constraints = [] 5892 while True: 5893 constraint = self._parse_unnamed_constraint() or self._parse_function() 5894 if not constraint: 5895 break 5896 constraints.append(constraint) 5897 5898 return constraints 5899 5900 def _parse_unnamed_constraint( 5901 self, constraints: t.Optional[t.Collection[str]] = None 5902 ) -> t.Optional[exp.Expression]: 5903 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5904 constraints or self.CONSTRAINT_PARSERS 5905 ): 5906 return None 5907 5908 constraint = self._prev.text.upper() 5909 if constraint not in self.CONSTRAINT_PARSERS: 5910 self.raise_error(f"No parser found for schema constraint {constraint}.") 5911 5912 return self.CONSTRAINT_PARSERS[constraint](self) 5913 5914 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5915 return self._parse_id_var(any_token=False) 5916 5917 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5918 self._match_text_seq("KEY") 5919 return self.expression( 5920 exp.UniqueColumnConstraint, 5921 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5922 this=self._parse_schema(self._parse_unique_key()), 5923 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5924 on_conflict=self._parse_on_conflict(), 5925 ) 5926 5927 def _parse_key_constraint_options(self) -> t.List[str]: 5928 options = [] 5929 while True: 5930 if not self._curr: 5931 break 5932 5933 if self._match(TokenType.ON): 5934 action = None 5935 on = self._advance_any() and self._prev.text 5936 5937 if self._match_text_seq("NO", "ACTION"): 5938 action = "NO ACTION" 5939 elif self._match_text_seq("CASCADE"): 5940 action = "CASCADE" 5941 elif self._match_text_seq("RESTRICT"): 5942 action = "RESTRICT" 5943 elif self._match_pair(TokenType.SET, TokenType.NULL): 5944 action = "SET NULL" 5945 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5946 action = "SET DEFAULT" 5947 else: 5948 self.raise_error("Invalid key constraint") 5949 5950 options.append(f"ON {on} {action}") 5951 else: 5952 var = self._parse_var_from_options( 5953 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5954 ) 5955 if not var: 5956 break 5957 options.append(var.name) 5958 5959 return options 5960 5961 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5962 if match and not self._match(TokenType.REFERENCES): 5963 return None 5964 5965 expressions = None 5966 this = self._parse_table(schema=True) 5967 options = self._parse_key_constraint_options() 5968 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5969 5970 def _parse_foreign_key(self) -> exp.ForeignKey: 5971 expressions = self._parse_wrapped_id_vars() 5972 reference = self._parse_references() 5973 options = {} 5974 5975 while self._match(TokenType.ON): 5976 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5977 self.raise_error("Expected DELETE or UPDATE") 5978 5979 kind = self._prev.text.lower() 5980 5981 if self._match_text_seq("NO", "ACTION"): 5982 action = "NO ACTION" 5983 elif self._match(TokenType.SET): 5984 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5985 action = "SET " + self._prev.text.upper() 5986 else: 5987 self._advance() 5988 action = self._prev.text.upper() 5989 5990 options[kind] = action 5991 5992 return self.expression( 5993 exp.ForeignKey, 5994 expressions=expressions, 5995 reference=reference, 5996 **options, # type: ignore 5997 ) 5998 5999 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6000 return self._parse_ordered() or self._parse_field() 6001 6002 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6003 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6004 self._retreat(self._index - 1) 6005 return None 6006 6007 id_vars = self._parse_wrapped_id_vars() 6008 return self.expression( 6009 exp.PeriodForSystemTimeConstraint, 6010 this=seq_get(id_vars, 0), 6011 expression=seq_get(id_vars, 1), 6012 ) 6013 6014 def _parse_primary_key( 6015 self, wrapped_optional: bool = False, in_props: bool = False 6016 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6017 desc = ( 6018 self._match_set((TokenType.ASC, TokenType.DESC)) 6019 and self._prev.token_type == TokenType.DESC 6020 ) 6021 6022 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6023 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6024 6025 expressions = self._parse_wrapped_csv( 6026 self._parse_primary_key_part, optional=wrapped_optional 6027 ) 6028 options = self._parse_key_constraint_options() 6029 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6030 6031 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6032 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6033 6034 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6035 """ 6036 Parses a datetime column in ODBC format. We parse the column into the corresponding 6037 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6038 same as we did for `DATE('yyyy-mm-dd')`. 6039 6040 Reference: 6041 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6042 """ 6043 self._match(TokenType.VAR) 6044 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6045 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6046 if not self._match(TokenType.R_BRACE): 6047 self.raise_error("Expected }") 6048 return expression 6049 6050 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6051 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6052 return this 6053 6054 bracket_kind = self._prev.token_type 6055 if ( 6056 bracket_kind == TokenType.L_BRACE 6057 and self._curr 6058 and self._curr.token_type == TokenType.VAR 6059 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6060 ): 6061 return self._parse_odbc_datetime_literal() 6062 6063 expressions = self._parse_csv( 6064 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6065 ) 6066 6067 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6068 self.raise_error("Expected ]") 6069 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6070 self.raise_error("Expected }") 6071 6072 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6073 if bracket_kind == TokenType.L_BRACE: 6074 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6075 elif not this: 6076 this = build_array_constructor( 6077 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6078 ) 6079 else: 6080 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6081 if constructor_type: 6082 return build_array_constructor( 6083 constructor_type, 6084 args=expressions, 6085 bracket_kind=bracket_kind, 6086 dialect=self.dialect, 6087 ) 6088 6089 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6090 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6091 6092 self._add_comments(this) 6093 return self._parse_bracket(this) 6094 6095 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6096 if self._match(TokenType.COLON): 6097 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6098 return this 6099 6100 def _parse_case(self) -> t.Optional[exp.Expression]: 6101 ifs = [] 6102 default = None 6103 6104 comments = self._prev_comments 6105 expression = self._parse_assignment() 6106 6107 while self._match(TokenType.WHEN): 6108 this = self._parse_assignment() 6109 self._match(TokenType.THEN) 6110 then = self._parse_assignment() 6111 ifs.append(self.expression(exp.If, this=this, true=then)) 6112 6113 if self._match(TokenType.ELSE): 6114 default = self._parse_assignment() 6115 6116 if not self._match(TokenType.END): 6117 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6118 default = exp.column("interval") 6119 else: 6120 self.raise_error("Expected END after CASE", self._prev) 6121 6122 return self.expression( 6123 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6124 ) 6125 6126 def _parse_if(self) -> t.Optional[exp.Expression]: 6127 if self._match(TokenType.L_PAREN): 6128 args = self._parse_csv(self._parse_assignment) 6129 this = self.validate_expression(exp.If.from_arg_list(args), args) 6130 self._match_r_paren() 6131 else: 6132 index = self._index - 1 6133 6134 if self.NO_PAREN_IF_COMMANDS and index == 0: 6135 return self._parse_as_command(self._prev) 6136 6137 condition = self._parse_assignment() 6138 6139 if not condition: 6140 self._retreat(index) 6141 return None 6142 6143 self._match(TokenType.THEN) 6144 true = self._parse_assignment() 6145 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6146 self._match(TokenType.END) 6147 this = self.expression(exp.If, this=condition, true=true, false=false) 6148 6149 return this 6150 6151 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6152 if not self._match_text_seq("VALUE", "FOR"): 6153 self._retreat(self._index - 1) 6154 return None 6155 6156 return self.expression( 6157 exp.NextValueFor, 6158 this=self._parse_column(), 6159 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6160 ) 6161 6162 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6163 this = self._parse_function() or self._parse_var_or_string(upper=True) 6164 6165 if self._match(TokenType.FROM): 6166 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6167 6168 if not self._match(TokenType.COMMA): 6169 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6170 6171 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6172 6173 def _parse_gap_fill(self) -> exp.GapFill: 6174 self._match(TokenType.TABLE) 6175 this = self._parse_table() 6176 6177 self._match(TokenType.COMMA) 6178 args = [this, *self._parse_csv(self._parse_lambda)] 6179 6180 gap_fill = exp.GapFill.from_arg_list(args) 6181 return self.validate_expression(gap_fill, args) 6182 6183 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6184 this = self._parse_assignment() 6185 6186 if not self._match(TokenType.ALIAS): 6187 if self._match(TokenType.COMMA): 6188 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6189 6190 self.raise_error("Expected AS after CAST") 6191 6192 fmt = None 6193 to = self._parse_types() 6194 6195 default = self._match(TokenType.DEFAULT) 6196 if default: 6197 default = self._parse_bitwise() 6198 self._match_text_seq("ON", "CONVERSION", "ERROR") 6199 6200 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6201 fmt_string = self._parse_string() 6202 fmt = self._parse_at_time_zone(fmt_string) 6203 6204 if not to: 6205 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6206 if to.this in exp.DataType.TEMPORAL_TYPES: 6207 this = self.expression( 6208 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6209 this=this, 6210 format=exp.Literal.string( 6211 format_time( 6212 fmt_string.this if fmt_string else "", 6213 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6214 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6215 ) 6216 ), 6217 safe=safe, 6218 ) 6219 6220 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6221 this.set("zone", fmt.args["zone"]) 6222 return this 6223 elif not to: 6224 self.raise_error("Expected TYPE after CAST") 6225 elif isinstance(to, exp.Identifier): 6226 to = exp.DataType.build(to.name, udt=True) 6227 elif to.this == exp.DataType.Type.CHAR: 6228 if self._match(TokenType.CHARACTER_SET): 6229 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6230 6231 return self.expression( 6232 exp.Cast if strict else exp.TryCast, 6233 this=this, 6234 to=to, 6235 format=fmt, 6236 safe=safe, 6237 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6238 default=default, 6239 ) 6240 6241 def _parse_string_agg(self) -> exp.GroupConcat: 6242 if self._match(TokenType.DISTINCT): 6243 args: t.List[t.Optional[exp.Expression]] = [ 6244 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6245 ] 6246 if self._match(TokenType.COMMA): 6247 args.extend(self._parse_csv(self._parse_assignment)) 6248 else: 6249 args = self._parse_csv(self._parse_assignment) # type: ignore 6250 6251 if self._match_text_seq("ON", "OVERFLOW"): 6252 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6253 if self._match_text_seq("ERROR"): 6254 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6255 else: 6256 self._match_text_seq("TRUNCATE") 6257 on_overflow = self.expression( 6258 exp.OverflowTruncateBehavior, 6259 this=self._parse_string(), 6260 with_count=( 6261 self._match_text_seq("WITH", "COUNT") 6262 or not self._match_text_seq("WITHOUT", "COUNT") 6263 ), 6264 ) 6265 else: 6266 on_overflow = None 6267 6268 index = self._index 6269 if not self._match(TokenType.R_PAREN) and args: 6270 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6271 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6272 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6273 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6274 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6275 6276 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6277 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6278 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6279 if not self._match_text_seq("WITHIN", "GROUP"): 6280 self._retreat(index) 6281 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6282 6283 # The corresponding match_r_paren will be called in parse_function (caller) 6284 self._match_l_paren() 6285 6286 return self.expression( 6287 exp.GroupConcat, 6288 this=self._parse_order(this=seq_get(args, 0)), 6289 separator=seq_get(args, 1), 6290 on_overflow=on_overflow, 6291 ) 6292 6293 def _parse_convert( 6294 self, strict: bool, safe: t.Optional[bool] = None 6295 ) -> t.Optional[exp.Expression]: 6296 this = self._parse_bitwise() 6297 6298 if self._match(TokenType.USING): 6299 to: t.Optional[exp.Expression] = self.expression( 6300 exp.CharacterSet, this=self._parse_var() 6301 ) 6302 elif self._match(TokenType.COMMA): 6303 to = self._parse_types() 6304 else: 6305 to = None 6306 6307 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6308 6309 def _parse_xml_table(self) -> exp.XMLTable: 6310 namespaces = None 6311 passing = None 6312 columns = None 6313 6314 if self._match_text_seq("XMLNAMESPACES", "("): 6315 namespaces = self._parse_xml_namespace() 6316 self._match_text_seq(")", ",") 6317 6318 this = self._parse_string() 6319 6320 if self._match_text_seq("PASSING"): 6321 # The BY VALUE keywords are optional and are provided for semantic clarity 6322 self._match_text_seq("BY", "VALUE") 6323 passing = self._parse_csv(self._parse_column) 6324 6325 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6326 6327 if self._match_text_seq("COLUMNS"): 6328 columns = self._parse_csv(self._parse_field_def) 6329 6330 return self.expression( 6331 exp.XMLTable, 6332 this=this, 6333 namespaces=namespaces, 6334 passing=passing, 6335 columns=columns, 6336 by_ref=by_ref, 6337 ) 6338 6339 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6340 namespaces = [] 6341 6342 while True: 6343 if self._match(TokenType.DEFAULT): 6344 uri = self._parse_string() 6345 else: 6346 uri = self._parse_alias(self._parse_string()) 6347 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6348 if not self._match(TokenType.COMMA): 6349 break 6350 6351 return namespaces 6352 6353 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6354 """ 6355 There are generally two variants of the DECODE function: 6356 6357 - DECODE(bin, charset) 6358 - DECODE(expression, search, result [, search, result] ... [, default]) 6359 6360 The second variant will always be parsed into a CASE expression. Note that NULL 6361 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6362 instead of relying on pattern matching. 6363 """ 6364 args = self._parse_csv(self._parse_assignment) 6365 6366 if len(args) < 3: 6367 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6368 6369 expression, *expressions = args 6370 if not expression: 6371 return None 6372 6373 ifs = [] 6374 for search, result in zip(expressions[::2], expressions[1::2]): 6375 if not search or not result: 6376 return None 6377 6378 if isinstance(search, exp.Literal): 6379 ifs.append( 6380 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6381 ) 6382 elif isinstance(search, exp.Null): 6383 ifs.append( 6384 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6385 ) 6386 else: 6387 cond = exp.or_( 6388 exp.EQ(this=expression.copy(), expression=search), 6389 exp.and_( 6390 exp.Is(this=expression.copy(), expression=exp.Null()), 6391 exp.Is(this=search.copy(), expression=exp.Null()), 6392 copy=False, 6393 ), 6394 copy=False, 6395 ) 6396 ifs.append(exp.If(this=cond, true=result)) 6397 6398 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6399 6400 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6401 self._match_text_seq("KEY") 6402 key = self._parse_column() 6403 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6404 self._match_text_seq("VALUE") 6405 value = self._parse_bitwise() 6406 6407 if not key and not value: 6408 return None 6409 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6410 6411 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6412 if not this or not self._match_text_seq("FORMAT", "JSON"): 6413 return this 6414 6415 return self.expression(exp.FormatJson, this=this) 6416 6417 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6418 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6419 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6420 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6421 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6422 else: 6423 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6424 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6425 6426 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6427 6428 if not empty and not error and not null: 6429 return None 6430 6431 return self.expression( 6432 exp.OnCondition, 6433 empty=empty, 6434 error=error, 6435 null=null, 6436 ) 6437 6438 def _parse_on_handling( 6439 self, on: str, *values: str 6440 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6441 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6442 for value in values: 6443 if self._match_text_seq(value, "ON", on): 6444 return f"{value} ON {on}" 6445 6446 index = self._index 6447 if self._match(TokenType.DEFAULT): 6448 default_value = self._parse_bitwise() 6449 if self._match_text_seq("ON", on): 6450 return default_value 6451 6452 self._retreat(index) 6453 6454 return None 6455 6456 @t.overload 6457 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6458 6459 @t.overload 6460 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6461 6462 def _parse_json_object(self, agg=False): 6463 star = self._parse_star() 6464 expressions = ( 6465 [star] 6466 if star 6467 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6468 ) 6469 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6470 6471 unique_keys = None 6472 if self._match_text_seq("WITH", "UNIQUE"): 6473 unique_keys = True 6474 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6475 unique_keys = False 6476 6477 self._match_text_seq("KEYS") 6478 6479 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6480 self._parse_type() 6481 ) 6482 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6483 6484 return self.expression( 6485 exp.JSONObjectAgg if agg else exp.JSONObject, 6486 expressions=expressions, 6487 null_handling=null_handling, 6488 unique_keys=unique_keys, 6489 return_type=return_type, 6490 encoding=encoding, 6491 ) 6492 6493 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6494 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6495 if not self._match_text_seq("NESTED"): 6496 this = self._parse_id_var() 6497 kind = self._parse_types(allow_identifiers=False) 6498 nested = None 6499 else: 6500 this = None 6501 kind = None 6502 nested = True 6503 6504 path = self._match_text_seq("PATH") and self._parse_string() 6505 nested_schema = nested and self._parse_json_schema() 6506 6507 return self.expression( 6508 exp.JSONColumnDef, 6509 this=this, 6510 kind=kind, 6511 path=path, 6512 nested_schema=nested_schema, 6513 ) 6514 6515 def _parse_json_schema(self) -> exp.JSONSchema: 6516 self._match_text_seq("COLUMNS") 6517 return self.expression( 6518 exp.JSONSchema, 6519 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6520 ) 6521 6522 def _parse_json_table(self) -> exp.JSONTable: 6523 this = self._parse_format_json(self._parse_bitwise()) 6524 path = self._match(TokenType.COMMA) and self._parse_string() 6525 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6526 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6527 schema = self._parse_json_schema() 6528 6529 return exp.JSONTable( 6530 this=this, 6531 schema=schema, 6532 path=path, 6533 error_handling=error_handling, 6534 empty_handling=empty_handling, 6535 ) 6536 6537 def _parse_match_against(self) -> exp.MatchAgainst: 6538 expressions = self._parse_csv(self._parse_column) 6539 6540 self._match_text_seq(")", "AGAINST", "(") 6541 6542 this = self._parse_string() 6543 6544 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6545 modifier = "IN NATURAL LANGUAGE MODE" 6546 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6547 modifier = f"{modifier} WITH QUERY EXPANSION" 6548 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6549 modifier = "IN BOOLEAN MODE" 6550 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6551 modifier = "WITH QUERY EXPANSION" 6552 else: 6553 modifier = None 6554 6555 return self.expression( 6556 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6557 ) 6558 6559 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6560 def _parse_open_json(self) -> exp.OpenJSON: 6561 this = self._parse_bitwise() 6562 path = self._match(TokenType.COMMA) and self._parse_string() 6563 6564 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6565 this = self._parse_field(any_token=True) 6566 kind = self._parse_types() 6567 path = self._parse_string() 6568 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6569 6570 return self.expression( 6571 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6572 ) 6573 6574 expressions = None 6575 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6576 self._match_l_paren() 6577 expressions = self._parse_csv(_parse_open_json_column_def) 6578 6579 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6580 6581 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6582 args = self._parse_csv(self._parse_bitwise) 6583 6584 if self._match(TokenType.IN): 6585 return self.expression( 6586 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6587 ) 6588 6589 if haystack_first: 6590 haystack = seq_get(args, 0) 6591 needle = seq_get(args, 1) 6592 else: 6593 haystack = seq_get(args, 1) 6594 needle = seq_get(args, 0) 6595 6596 return self.expression( 6597 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6598 ) 6599 6600 def _parse_predict(self) -> exp.Predict: 6601 self._match_text_seq("MODEL") 6602 this = self._parse_table() 6603 6604 self._match(TokenType.COMMA) 6605 self._match_text_seq("TABLE") 6606 6607 return self.expression( 6608 exp.Predict, 6609 this=this, 6610 expression=self._parse_table(), 6611 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6612 ) 6613 6614 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6615 args = self._parse_csv(self._parse_table) 6616 return exp.JoinHint(this=func_name.upper(), expressions=args) 6617 6618 def _parse_substring(self) -> exp.Substring: 6619 # Postgres supports the form: substring(string [from int] [for int]) 6620 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6621 6622 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6623 6624 if self._match(TokenType.FROM): 6625 args.append(self._parse_bitwise()) 6626 if self._match(TokenType.FOR): 6627 if len(args) == 1: 6628 args.append(exp.Literal.number(1)) 6629 args.append(self._parse_bitwise()) 6630 6631 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6632 6633 def _parse_trim(self) -> exp.Trim: 6634 # https://www.w3resource.com/sql/character-functions/trim.php 6635 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6636 6637 position = None 6638 collation = None 6639 expression = None 6640 6641 if self._match_texts(self.TRIM_TYPES): 6642 position = self._prev.text.upper() 6643 6644 this = self._parse_bitwise() 6645 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6646 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6647 expression = self._parse_bitwise() 6648 6649 if invert_order: 6650 this, expression = expression, this 6651 6652 if self._match(TokenType.COLLATE): 6653 collation = self._parse_bitwise() 6654 6655 return self.expression( 6656 exp.Trim, this=this, position=position, expression=expression, collation=collation 6657 ) 6658 6659 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6660 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6661 6662 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6663 return self._parse_window(self._parse_id_var(), alias=True) 6664 6665 def _parse_respect_or_ignore_nulls( 6666 self, this: t.Optional[exp.Expression] 6667 ) -> t.Optional[exp.Expression]: 6668 if self._match_text_seq("IGNORE", "NULLS"): 6669 return self.expression(exp.IgnoreNulls, this=this) 6670 if self._match_text_seq("RESPECT", "NULLS"): 6671 return self.expression(exp.RespectNulls, this=this) 6672 return this 6673 6674 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6675 if self._match(TokenType.HAVING): 6676 self._match_texts(("MAX", "MIN")) 6677 max = self._prev.text.upper() != "MIN" 6678 return self.expression( 6679 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6680 ) 6681 6682 return this 6683 6684 def _parse_window( 6685 self, this: t.Optional[exp.Expression], alias: bool = False 6686 ) -> t.Optional[exp.Expression]: 6687 func = this 6688 comments = func.comments if isinstance(func, exp.Expression) else None 6689 6690 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6691 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6692 if self._match_text_seq("WITHIN", "GROUP"): 6693 order = self._parse_wrapped(self._parse_order) 6694 this = self.expression(exp.WithinGroup, this=this, expression=order) 6695 6696 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6697 self._match(TokenType.WHERE) 6698 this = self.expression( 6699 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6700 ) 6701 self._match_r_paren() 6702 6703 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6704 # Some dialects choose to implement and some do not. 6705 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6706 6707 # There is some code above in _parse_lambda that handles 6708 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6709 6710 # The below changes handle 6711 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6712 6713 # Oracle allows both formats 6714 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6715 # and Snowflake chose to do the same for familiarity 6716 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6717 if isinstance(this, exp.AggFunc): 6718 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6719 6720 if ignore_respect and ignore_respect is not this: 6721 ignore_respect.replace(ignore_respect.this) 6722 this = self.expression(ignore_respect.__class__, this=this) 6723 6724 this = self._parse_respect_or_ignore_nulls(this) 6725 6726 # bigquery select from window x AS (partition by ...) 6727 if alias: 6728 over = None 6729 self._match(TokenType.ALIAS) 6730 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6731 return this 6732 else: 6733 over = self._prev.text.upper() 6734 6735 if comments and isinstance(func, exp.Expression): 6736 func.pop_comments() 6737 6738 if not self._match(TokenType.L_PAREN): 6739 return self.expression( 6740 exp.Window, 6741 comments=comments, 6742 this=this, 6743 alias=self._parse_id_var(False), 6744 over=over, 6745 ) 6746 6747 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6748 6749 first = self._match(TokenType.FIRST) 6750 if self._match_text_seq("LAST"): 6751 first = False 6752 6753 partition, order = self._parse_partition_and_order() 6754 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6755 6756 if kind: 6757 self._match(TokenType.BETWEEN) 6758 start = self._parse_window_spec() 6759 self._match(TokenType.AND) 6760 end = self._parse_window_spec() 6761 6762 spec = self.expression( 6763 exp.WindowSpec, 6764 kind=kind, 6765 start=start["value"], 6766 start_side=start["side"], 6767 end=end["value"], 6768 end_side=end["side"], 6769 ) 6770 else: 6771 spec = None 6772 6773 self._match_r_paren() 6774 6775 window = self.expression( 6776 exp.Window, 6777 comments=comments, 6778 this=this, 6779 partition_by=partition, 6780 order=order, 6781 spec=spec, 6782 alias=window_alias, 6783 over=over, 6784 first=first, 6785 ) 6786 6787 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6788 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6789 return self._parse_window(window, alias=alias) 6790 6791 return window 6792 6793 def _parse_partition_and_order( 6794 self, 6795 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6796 return self._parse_partition_by(), self._parse_order() 6797 6798 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6799 self._match(TokenType.BETWEEN) 6800 6801 return { 6802 "value": ( 6803 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6804 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6805 or self._parse_bitwise() 6806 ), 6807 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6808 } 6809 6810 def _parse_alias( 6811 self, this: t.Optional[exp.Expression], explicit: bool = False 6812 ) -> t.Optional[exp.Expression]: 6813 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6814 # so this section tries to parse the clause version and if it fails, it treats the token 6815 # as an identifier (alias) 6816 if self._can_parse_limit_or_offset(): 6817 return this 6818 6819 any_token = self._match(TokenType.ALIAS) 6820 comments = self._prev_comments or [] 6821 6822 if explicit and not any_token: 6823 return this 6824 6825 if self._match(TokenType.L_PAREN): 6826 aliases = self.expression( 6827 exp.Aliases, 6828 comments=comments, 6829 this=this, 6830 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6831 ) 6832 self._match_r_paren(aliases) 6833 return aliases 6834 6835 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6836 self.STRING_ALIASES and self._parse_string_as_identifier() 6837 ) 6838 6839 if alias: 6840 comments.extend(alias.pop_comments()) 6841 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6842 column = this.this 6843 6844 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6845 if not this.comments and column and column.comments: 6846 this.comments = column.pop_comments() 6847 6848 return this 6849 6850 def _parse_id_var( 6851 self, 6852 any_token: bool = True, 6853 tokens: t.Optional[t.Collection[TokenType]] = None, 6854 ) -> t.Optional[exp.Expression]: 6855 expression = self._parse_identifier() 6856 if not expression and ( 6857 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6858 ): 6859 quoted = self._prev.token_type == TokenType.STRING 6860 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6861 6862 return expression 6863 6864 def _parse_string(self) -> t.Optional[exp.Expression]: 6865 if self._match_set(self.STRING_PARSERS): 6866 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6867 return self._parse_placeholder() 6868 6869 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6870 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6871 6872 def _parse_number(self) -> t.Optional[exp.Expression]: 6873 if self._match_set(self.NUMERIC_PARSERS): 6874 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6875 return self._parse_placeholder() 6876 6877 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6878 if self._match(TokenType.IDENTIFIER): 6879 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6880 return self._parse_placeholder() 6881 6882 def _parse_var( 6883 self, 6884 any_token: bool = False, 6885 tokens: t.Optional[t.Collection[TokenType]] = None, 6886 upper: bool = False, 6887 ) -> t.Optional[exp.Expression]: 6888 if ( 6889 (any_token and self._advance_any()) 6890 or self._match(TokenType.VAR) 6891 or (self._match_set(tokens) if tokens else False) 6892 ): 6893 return self.expression( 6894 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6895 ) 6896 return self._parse_placeholder() 6897 6898 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6899 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6900 self._advance() 6901 return self._prev 6902 return None 6903 6904 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6905 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6906 6907 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6908 return self._parse_primary() or self._parse_var(any_token=True) 6909 6910 def _parse_null(self) -> t.Optional[exp.Expression]: 6911 if self._match_set(self.NULL_TOKENS): 6912 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6913 return self._parse_placeholder() 6914 6915 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6916 if self._match(TokenType.TRUE): 6917 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6918 if self._match(TokenType.FALSE): 6919 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6920 return self._parse_placeholder() 6921 6922 def _parse_star(self) -> t.Optional[exp.Expression]: 6923 if self._match(TokenType.STAR): 6924 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6925 return self._parse_placeholder() 6926 6927 def _parse_parameter(self) -> exp.Parameter: 6928 this = self._parse_identifier() or self._parse_primary_or_var() 6929 return self.expression(exp.Parameter, this=this) 6930 6931 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6932 if self._match_set(self.PLACEHOLDER_PARSERS): 6933 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6934 if placeholder: 6935 return placeholder 6936 self._advance(-1) 6937 return None 6938 6939 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6940 if not self._match_texts(keywords): 6941 return None 6942 if self._match(TokenType.L_PAREN, advance=False): 6943 return self._parse_wrapped_csv(self._parse_expression) 6944 6945 expression = self._parse_expression() 6946 return [expression] if expression else None 6947 6948 def _parse_csv( 6949 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6950 ) -> t.List[exp.Expression]: 6951 parse_result = parse_method() 6952 items = [parse_result] if parse_result is not None else [] 6953 6954 while self._match(sep): 6955 self._add_comments(parse_result) 6956 parse_result = parse_method() 6957 if parse_result is not None: 6958 items.append(parse_result) 6959 6960 return items 6961 6962 def _parse_tokens( 6963 self, parse_method: t.Callable, expressions: t.Dict 6964 ) -> t.Optional[exp.Expression]: 6965 this = parse_method() 6966 6967 while self._match_set(expressions): 6968 this = self.expression( 6969 expressions[self._prev.token_type], 6970 this=this, 6971 comments=self._prev_comments, 6972 expression=parse_method(), 6973 ) 6974 6975 return this 6976 6977 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6978 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6979 6980 def _parse_wrapped_csv( 6981 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6982 ) -> t.List[exp.Expression]: 6983 return self._parse_wrapped( 6984 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6985 ) 6986 6987 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6988 wrapped = self._match(TokenType.L_PAREN) 6989 if not wrapped and not optional: 6990 self.raise_error("Expecting (") 6991 parse_result = parse_method() 6992 if wrapped: 6993 self._match_r_paren() 6994 return parse_result 6995 6996 def _parse_expressions(self) -> t.List[exp.Expression]: 6997 return self._parse_csv(self._parse_expression) 6998 6999 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7000 return self._parse_select() or self._parse_set_operations( 7001 self._parse_alias(self._parse_assignment(), explicit=True) 7002 if alias 7003 else self._parse_assignment() 7004 ) 7005 7006 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7007 return self._parse_query_modifiers( 7008 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7009 ) 7010 7011 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7012 this = None 7013 if self._match_texts(self.TRANSACTION_KIND): 7014 this = self._prev.text 7015 7016 self._match_texts(("TRANSACTION", "WORK")) 7017 7018 modes = [] 7019 while True: 7020 mode = [] 7021 while self._match(TokenType.VAR): 7022 mode.append(self._prev.text) 7023 7024 if mode: 7025 modes.append(" ".join(mode)) 7026 if not self._match(TokenType.COMMA): 7027 break 7028 7029 return self.expression(exp.Transaction, this=this, modes=modes) 7030 7031 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7032 chain = None 7033 savepoint = None 7034 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7035 7036 self._match_texts(("TRANSACTION", "WORK")) 7037 7038 if self._match_text_seq("TO"): 7039 self._match_text_seq("SAVEPOINT") 7040 savepoint = self._parse_id_var() 7041 7042 if self._match(TokenType.AND): 7043 chain = not self._match_text_seq("NO") 7044 self._match_text_seq("CHAIN") 7045 7046 if is_rollback: 7047 return self.expression(exp.Rollback, savepoint=savepoint) 7048 7049 return self.expression(exp.Commit, chain=chain) 7050 7051 def _parse_refresh(self) -> exp.Refresh: 7052 self._match(TokenType.TABLE) 7053 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7054 7055 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7056 if not self._match_text_seq("ADD"): 7057 return None 7058 7059 self._match(TokenType.COLUMN) 7060 exists_column = self._parse_exists(not_=True) 7061 expression = self._parse_field_def() 7062 7063 if expression: 7064 expression.set("exists", exists_column) 7065 7066 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7067 if self._match_texts(("FIRST", "AFTER")): 7068 position = self._prev.text 7069 column_position = self.expression( 7070 exp.ColumnPosition, this=self._parse_column(), position=position 7071 ) 7072 expression.set("position", column_position) 7073 7074 return expression 7075 7076 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7077 drop = self._match(TokenType.DROP) and self._parse_drop() 7078 if drop and not isinstance(drop, exp.Command): 7079 drop.set("kind", drop.args.get("kind", "COLUMN")) 7080 return drop 7081 7082 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7083 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7084 return self.expression( 7085 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7086 ) 7087 7088 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7089 index = self._index - 1 7090 7091 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7092 return self._parse_csv( 7093 lambda: self.expression( 7094 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7095 ) 7096 ) 7097 7098 self._retreat(index) 7099 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7100 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7101 7102 if self._match_text_seq("ADD", "COLUMNS"): 7103 schema = self._parse_schema() 7104 if schema: 7105 return [schema] 7106 return [] 7107 7108 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7109 7110 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7111 if self._match_texts(self.ALTER_ALTER_PARSERS): 7112 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7113 7114 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7115 # keyword after ALTER we default to parsing this statement 7116 self._match(TokenType.COLUMN) 7117 column = self._parse_field(any_token=True) 7118 7119 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7120 return self.expression(exp.AlterColumn, this=column, drop=True) 7121 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7122 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7123 if self._match(TokenType.COMMENT): 7124 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7125 if self._match_text_seq("DROP", "NOT", "NULL"): 7126 return self.expression( 7127 exp.AlterColumn, 7128 this=column, 7129 drop=True, 7130 allow_null=True, 7131 ) 7132 if self._match_text_seq("SET", "NOT", "NULL"): 7133 return self.expression( 7134 exp.AlterColumn, 7135 this=column, 7136 allow_null=False, 7137 ) 7138 7139 if self._match_text_seq("SET", "VISIBLE"): 7140 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7141 if self._match_text_seq("SET", "INVISIBLE"): 7142 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7143 7144 self._match_text_seq("SET", "DATA") 7145 self._match_text_seq("TYPE") 7146 return self.expression( 7147 exp.AlterColumn, 7148 this=column, 7149 dtype=self._parse_types(), 7150 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7151 using=self._match(TokenType.USING) and self._parse_assignment(), 7152 ) 7153 7154 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7155 if self._match_texts(("ALL", "EVEN", "AUTO")): 7156 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7157 7158 self._match_text_seq("KEY", "DISTKEY") 7159 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7160 7161 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7162 if compound: 7163 self._match_text_seq("SORTKEY") 7164 7165 if self._match(TokenType.L_PAREN, advance=False): 7166 return self.expression( 7167 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7168 ) 7169 7170 self._match_texts(("AUTO", "NONE")) 7171 return self.expression( 7172 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7173 ) 7174 7175 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7176 index = self._index - 1 7177 7178 partition_exists = self._parse_exists() 7179 if self._match(TokenType.PARTITION, advance=False): 7180 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7181 7182 self._retreat(index) 7183 return self._parse_csv(self._parse_drop_column) 7184 7185 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7186 if self._match(TokenType.COLUMN): 7187 exists = self._parse_exists() 7188 old_column = self._parse_column() 7189 to = self._match_text_seq("TO") 7190 new_column = self._parse_column() 7191 7192 if old_column is None or to is None or new_column is None: 7193 return None 7194 7195 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7196 7197 self._match_text_seq("TO") 7198 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7199 7200 def _parse_alter_table_set(self) -> exp.AlterSet: 7201 alter_set = self.expression(exp.AlterSet) 7202 7203 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7204 "TABLE", "PROPERTIES" 7205 ): 7206 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7207 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7208 alter_set.set("expressions", [self._parse_assignment()]) 7209 elif self._match_texts(("LOGGED", "UNLOGGED")): 7210 alter_set.set("option", exp.var(self._prev.text.upper())) 7211 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7212 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7213 elif self._match_text_seq("LOCATION"): 7214 alter_set.set("location", self._parse_field()) 7215 elif self._match_text_seq("ACCESS", "METHOD"): 7216 alter_set.set("access_method", self._parse_field()) 7217 elif self._match_text_seq("TABLESPACE"): 7218 alter_set.set("tablespace", self._parse_field()) 7219 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7220 alter_set.set("file_format", [self._parse_field()]) 7221 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7222 alter_set.set("file_format", self._parse_wrapped_options()) 7223 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7224 alter_set.set("copy_options", self._parse_wrapped_options()) 7225 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7226 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7227 else: 7228 if self._match_text_seq("SERDE"): 7229 alter_set.set("serde", self._parse_field()) 7230 7231 alter_set.set("expressions", [self._parse_properties()]) 7232 7233 return alter_set 7234 7235 def _parse_alter(self) -> exp.Alter | exp.Command: 7236 start = self._prev 7237 7238 alter_token = self._match_set(self.ALTERABLES) and self._prev 7239 if not alter_token: 7240 return self._parse_as_command(start) 7241 7242 exists = self._parse_exists() 7243 only = self._match_text_seq("ONLY") 7244 this = self._parse_table(schema=True) 7245 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7246 7247 if self._next: 7248 self._advance() 7249 7250 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7251 if parser: 7252 actions = ensure_list(parser(self)) 7253 not_valid = self._match_text_seq("NOT", "VALID") 7254 options = self._parse_csv(self._parse_property) 7255 7256 if not self._curr and actions: 7257 return self.expression( 7258 exp.Alter, 7259 this=this, 7260 kind=alter_token.text.upper(), 7261 exists=exists, 7262 actions=actions, 7263 only=only, 7264 options=options, 7265 cluster=cluster, 7266 not_valid=not_valid, 7267 ) 7268 7269 return self._parse_as_command(start) 7270 7271 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7272 start = self._prev 7273 # https://duckdb.org/docs/sql/statements/analyze 7274 if not self._curr: 7275 return self.expression(exp.Analyze) 7276 7277 options = [] 7278 while self._match_texts(self.ANALYZE_STYLES): 7279 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7280 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7281 else: 7282 options.append(self._prev.text.upper()) 7283 7284 this: t.Optional[exp.Expression] = None 7285 inner_expression: t.Optional[exp.Expression] = None 7286 7287 kind = self._curr and self._curr.text.upper() 7288 7289 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7290 this = self._parse_table_parts() 7291 elif self._match_text_seq("TABLES"): 7292 if self._match_set((TokenType.FROM, TokenType.IN)): 7293 kind = f"{kind} {self._prev.text.upper()}" 7294 this = self._parse_table(schema=True, is_db_reference=True) 7295 elif self._match_text_seq("DATABASE"): 7296 this = self._parse_table(schema=True, is_db_reference=True) 7297 elif self._match_text_seq("CLUSTER"): 7298 this = self._parse_table() 7299 # Try matching inner expr keywords before fallback to parse table. 7300 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7301 kind = None 7302 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7303 else: 7304 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7305 kind = None 7306 this = self._parse_table_parts() 7307 7308 partition = self._try_parse(self._parse_partition) 7309 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7310 return self._parse_as_command(start) 7311 7312 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7313 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7314 "WITH", "ASYNC", "MODE" 7315 ): 7316 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7317 else: 7318 mode = None 7319 7320 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7321 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7322 7323 properties = self._parse_properties() 7324 return self.expression( 7325 exp.Analyze, 7326 kind=kind, 7327 this=this, 7328 mode=mode, 7329 partition=partition, 7330 properties=properties, 7331 expression=inner_expression, 7332 options=options, 7333 ) 7334 7335 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7336 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7337 this = None 7338 kind = self._prev.text.upper() 7339 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7340 expressions = [] 7341 7342 if not self._match_text_seq("STATISTICS"): 7343 self.raise_error("Expecting token STATISTICS") 7344 7345 if self._match_text_seq("NOSCAN"): 7346 this = "NOSCAN" 7347 elif self._match(TokenType.FOR): 7348 if self._match_text_seq("ALL", "COLUMNS"): 7349 this = "FOR ALL COLUMNS" 7350 if self._match_texts("COLUMNS"): 7351 this = "FOR COLUMNS" 7352 expressions = self._parse_csv(self._parse_column_reference) 7353 elif self._match_text_seq("SAMPLE"): 7354 sample = self._parse_number() 7355 expressions = [ 7356 self.expression( 7357 exp.AnalyzeSample, 7358 sample=sample, 7359 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7360 ) 7361 ] 7362 7363 return self.expression( 7364 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7365 ) 7366 7367 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7368 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7369 kind = None 7370 this = None 7371 expression: t.Optional[exp.Expression] = None 7372 if self._match_text_seq("REF", "UPDATE"): 7373 kind = "REF" 7374 this = "UPDATE" 7375 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7376 this = "UPDATE SET DANGLING TO NULL" 7377 elif self._match_text_seq("STRUCTURE"): 7378 kind = "STRUCTURE" 7379 if self._match_text_seq("CASCADE", "FAST"): 7380 this = "CASCADE FAST" 7381 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7382 ("ONLINE", "OFFLINE") 7383 ): 7384 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7385 expression = self._parse_into() 7386 7387 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7388 7389 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7390 this = self._prev.text.upper() 7391 if self._match_text_seq("COLUMNS"): 7392 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7393 return None 7394 7395 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7396 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7397 if self._match_text_seq("STATISTICS"): 7398 return self.expression(exp.AnalyzeDelete, kind=kind) 7399 return None 7400 7401 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7402 if self._match_text_seq("CHAINED", "ROWS"): 7403 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7404 return None 7405 7406 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7407 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7408 this = self._prev.text.upper() 7409 expression: t.Optional[exp.Expression] = None 7410 expressions = [] 7411 update_options = None 7412 7413 if self._match_text_seq("HISTOGRAM", "ON"): 7414 expressions = self._parse_csv(self._parse_column_reference) 7415 with_expressions = [] 7416 while self._match(TokenType.WITH): 7417 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7418 if self._match_texts(("SYNC", "ASYNC")): 7419 if self._match_text_seq("MODE", advance=False): 7420 with_expressions.append(f"{self._prev.text.upper()} MODE") 7421 self._advance() 7422 else: 7423 buckets = self._parse_number() 7424 if self._match_text_seq("BUCKETS"): 7425 with_expressions.append(f"{buckets} BUCKETS") 7426 if with_expressions: 7427 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7428 7429 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7430 TokenType.UPDATE, advance=False 7431 ): 7432 update_options = self._prev.text.upper() 7433 self._advance() 7434 elif self._match_text_seq("USING", "DATA"): 7435 expression = self.expression(exp.UsingData, this=self._parse_string()) 7436 7437 return self.expression( 7438 exp.AnalyzeHistogram, 7439 this=this, 7440 expressions=expressions, 7441 expression=expression, 7442 update_options=update_options, 7443 ) 7444 7445 def _parse_merge(self) -> exp.Merge: 7446 self._match(TokenType.INTO) 7447 target = self._parse_table() 7448 7449 if target and self._match(TokenType.ALIAS, advance=False): 7450 target.set("alias", self._parse_table_alias()) 7451 7452 self._match(TokenType.USING) 7453 using = self._parse_table() 7454 7455 self._match(TokenType.ON) 7456 on = self._parse_assignment() 7457 7458 return self.expression( 7459 exp.Merge, 7460 this=target, 7461 using=using, 7462 on=on, 7463 whens=self._parse_when_matched(), 7464 returning=self._parse_returning(), 7465 ) 7466 7467 def _parse_when_matched(self) -> exp.Whens: 7468 whens = [] 7469 7470 while self._match(TokenType.WHEN): 7471 matched = not self._match(TokenType.NOT) 7472 self._match_text_seq("MATCHED") 7473 source = ( 7474 False 7475 if self._match_text_seq("BY", "TARGET") 7476 else self._match_text_seq("BY", "SOURCE") 7477 ) 7478 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7479 7480 self._match(TokenType.THEN) 7481 7482 if self._match(TokenType.INSERT): 7483 this = self._parse_star() 7484 if this: 7485 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7486 else: 7487 then = self.expression( 7488 exp.Insert, 7489 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7490 expression=self._match_text_seq("VALUES") and self._parse_value(), 7491 ) 7492 elif self._match(TokenType.UPDATE): 7493 expressions = self._parse_star() 7494 if expressions: 7495 then = self.expression(exp.Update, expressions=expressions) 7496 else: 7497 then = self.expression( 7498 exp.Update, 7499 expressions=self._match(TokenType.SET) 7500 and self._parse_csv(self._parse_equality), 7501 ) 7502 elif self._match(TokenType.DELETE): 7503 then = self.expression(exp.Var, this=self._prev.text) 7504 else: 7505 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7506 7507 whens.append( 7508 self.expression( 7509 exp.When, 7510 matched=matched, 7511 source=source, 7512 condition=condition, 7513 then=then, 7514 ) 7515 ) 7516 return self.expression(exp.Whens, expressions=whens) 7517 7518 def _parse_show(self) -> t.Optional[exp.Expression]: 7519 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7520 if parser: 7521 return parser(self) 7522 return self._parse_as_command(self._prev) 7523 7524 def _parse_set_item_assignment( 7525 self, kind: t.Optional[str] = None 7526 ) -> t.Optional[exp.Expression]: 7527 index = self._index 7528 7529 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7530 return self._parse_set_transaction(global_=kind == "GLOBAL") 7531 7532 left = self._parse_primary() or self._parse_column() 7533 assignment_delimiter = self._match_texts(("=", "TO")) 7534 7535 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7536 self._retreat(index) 7537 return None 7538 7539 right = self._parse_statement() or self._parse_id_var() 7540 if isinstance(right, (exp.Column, exp.Identifier)): 7541 right = exp.var(right.name) 7542 7543 this = self.expression(exp.EQ, this=left, expression=right) 7544 return self.expression(exp.SetItem, this=this, kind=kind) 7545 7546 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7547 self._match_text_seq("TRANSACTION") 7548 characteristics = self._parse_csv( 7549 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7550 ) 7551 return self.expression( 7552 exp.SetItem, 7553 expressions=characteristics, 7554 kind="TRANSACTION", 7555 **{"global": global_}, # type: ignore 7556 ) 7557 7558 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7559 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7560 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7561 7562 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7563 index = self._index 7564 set_ = self.expression( 7565 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7566 ) 7567 7568 if self._curr: 7569 self._retreat(index) 7570 return self._parse_as_command(self._prev) 7571 7572 return set_ 7573 7574 def _parse_var_from_options( 7575 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7576 ) -> t.Optional[exp.Var]: 7577 start = self._curr 7578 if not start: 7579 return None 7580 7581 option = start.text.upper() 7582 continuations = options.get(option) 7583 7584 index = self._index 7585 self._advance() 7586 for keywords in continuations or []: 7587 if isinstance(keywords, str): 7588 keywords = (keywords,) 7589 7590 if self._match_text_seq(*keywords): 7591 option = f"{option} {' '.join(keywords)}" 7592 break 7593 else: 7594 if continuations or continuations is None: 7595 if raise_unmatched: 7596 self.raise_error(f"Unknown option {option}") 7597 7598 self._retreat(index) 7599 return None 7600 7601 return exp.var(option) 7602 7603 def _parse_as_command(self, start: Token) -> exp.Command: 7604 while self._curr: 7605 self._advance() 7606 text = self._find_sql(start, self._prev) 7607 size = len(start.text) 7608 self._warn_unsupported() 7609 return exp.Command(this=text[:size], expression=text[size:]) 7610 7611 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7612 settings = [] 7613 7614 self._match_l_paren() 7615 kind = self._parse_id_var() 7616 7617 if self._match(TokenType.L_PAREN): 7618 while True: 7619 key = self._parse_id_var() 7620 value = self._parse_primary() 7621 if not key and value is None: 7622 break 7623 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7624 self._match(TokenType.R_PAREN) 7625 7626 self._match_r_paren() 7627 7628 return self.expression( 7629 exp.DictProperty, 7630 this=this, 7631 kind=kind.this if kind else None, 7632 settings=settings, 7633 ) 7634 7635 def _parse_dict_range(self, this: str) -> exp.DictRange: 7636 self._match_l_paren() 7637 has_min = self._match_text_seq("MIN") 7638 if has_min: 7639 min = self._parse_var() or self._parse_primary() 7640 self._match_text_seq("MAX") 7641 max = self._parse_var() or self._parse_primary() 7642 else: 7643 max = self._parse_var() or self._parse_primary() 7644 min = exp.Literal.number(0) 7645 self._match_r_paren() 7646 return self.expression(exp.DictRange, this=this, min=min, max=max) 7647 7648 def _parse_comprehension( 7649 self, this: t.Optional[exp.Expression] 7650 ) -> t.Optional[exp.Comprehension]: 7651 index = self._index 7652 expression = self._parse_column() 7653 if not self._match(TokenType.IN): 7654 self._retreat(index - 1) 7655 return None 7656 iterator = self._parse_column() 7657 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7658 return self.expression( 7659 exp.Comprehension, 7660 this=this, 7661 expression=expression, 7662 iterator=iterator, 7663 condition=condition, 7664 ) 7665 7666 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7667 if self._match(TokenType.HEREDOC_STRING): 7668 return self.expression(exp.Heredoc, this=self._prev.text) 7669 7670 if not self._match_text_seq("$"): 7671 return None 7672 7673 tags = ["$"] 7674 tag_text = None 7675 7676 if self._is_connected(): 7677 self._advance() 7678 tags.append(self._prev.text.upper()) 7679 else: 7680 self.raise_error("No closing $ found") 7681 7682 if tags[-1] != "$": 7683 if self._is_connected() and self._match_text_seq("$"): 7684 tag_text = tags[-1] 7685 tags.append("$") 7686 else: 7687 self.raise_error("No closing $ found") 7688 7689 heredoc_start = self._curr 7690 7691 while self._curr: 7692 if self._match_text_seq(*tags, advance=False): 7693 this = self._find_sql(heredoc_start, self._prev) 7694 self._advance(len(tags)) 7695 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7696 7697 self._advance() 7698 7699 self.raise_error(f"No closing {''.join(tags)} found") 7700 return None 7701 7702 def _find_parser( 7703 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7704 ) -> t.Optional[t.Callable]: 7705 if not self._curr: 7706 return None 7707 7708 index = self._index 7709 this = [] 7710 while True: 7711 # The current token might be multiple words 7712 curr = self._curr.text.upper() 7713 key = curr.split(" ") 7714 this.append(curr) 7715 7716 self._advance() 7717 result, trie = in_trie(trie, key) 7718 if result == TrieResult.FAILED: 7719 break 7720 7721 if result == TrieResult.EXISTS: 7722 subparser = parsers[" ".join(this)] 7723 return subparser 7724 7725 self._retreat(index) 7726 return None 7727 7728 def _match(self, token_type, advance=True, expression=None): 7729 if not self._curr: 7730 return None 7731 7732 if self._curr.token_type == token_type: 7733 if advance: 7734 self._advance() 7735 self._add_comments(expression) 7736 return True 7737 7738 return None 7739 7740 def _match_set(self, types, advance=True): 7741 if not self._curr: 7742 return None 7743 7744 if self._curr.token_type in types: 7745 if advance: 7746 self._advance() 7747 return True 7748 7749 return None 7750 7751 def _match_pair(self, token_type_a, token_type_b, advance=True): 7752 if not self._curr or not self._next: 7753 return None 7754 7755 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7756 if advance: 7757 self._advance(2) 7758 return True 7759 7760 return None 7761 7762 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7763 if not self._match(TokenType.L_PAREN, expression=expression): 7764 self.raise_error("Expecting (") 7765 7766 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7767 if not self._match(TokenType.R_PAREN, expression=expression): 7768 self.raise_error("Expecting )") 7769 7770 def _match_texts(self, texts, advance=True): 7771 if ( 7772 self._curr 7773 and self._curr.token_type != TokenType.STRING 7774 and self._curr.text.upper() in texts 7775 ): 7776 if advance: 7777 self._advance() 7778 return True 7779 return None 7780 7781 def _match_text_seq(self, *texts, advance=True): 7782 index = self._index 7783 for text in texts: 7784 if ( 7785 self._curr 7786 and self._curr.token_type != TokenType.STRING 7787 and self._curr.text.upper() == text 7788 ): 7789 self._advance() 7790 else: 7791 self._retreat(index) 7792 return None 7793 7794 if not advance: 7795 self._retreat(index) 7796 7797 return True 7798 7799 def _replace_lambda( 7800 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7801 ) -> t.Optional[exp.Expression]: 7802 if not node: 7803 return node 7804 7805 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7806 7807 for column in node.find_all(exp.Column): 7808 typ = lambda_types.get(column.parts[0].name) 7809 if typ is not None: 7810 dot_or_id = column.to_dot() if column.table else column.this 7811 7812 if typ: 7813 dot_or_id = self.expression( 7814 exp.Cast, 7815 this=dot_or_id, 7816 to=typ, 7817 ) 7818 7819 parent = column.parent 7820 7821 while isinstance(parent, exp.Dot): 7822 if not isinstance(parent.parent, exp.Dot): 7823 parent.replace(dot_or_id) 7824 break 7825 parent = parent.parent 7826 else: 7827 if column is node: 7828 node = dot_or_id 7829 else: 7830 column.replace(dot_or_id) 7831 return node 7832 7833 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7834 start = self._prev 7835 7836 # Not to be confused with TRUNCATE(number, decimals) function call 7837 if self._match(TokenType.L_PAREN): 7838 self._retreat(self._index - 2) 7839 return self._parse_function() 7840 7841 # Clickhouse supports TRUNCATE DATABASE as well 7842 is_database = self._match(TokenType.DATABASE) 7843 7844 self._match(TokenType.TABLE) 7845 7846 exists = self._parse_exists(not_=False) 7847 7848 expressions = self._parse_csv( 7849 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7850 ) 7851 7852 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7853 7854 if self._match_text_seq("RESTART", "IDENTITY"): 7855 identity = "RESTART" 7856 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7857 identity = "CONTINUE" 7858 else: 7859 identity = None 7860 7861 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7862 option = self._prev.text 7863 else: 7864 option = None 7865 7866 partition = self._parse_partition() 7867 7868 # Fallback case 7869 if self._curr: 7870 return self._parse_as_command(start) 7871 7872 return self.expression( 7873 exp.TruncateTable, 7874 expressions=expressions, 7875 is_database=is_database, 7876 exists=exists, 7877 cluster=cluster, 7878 identity=identity, 7879 option=option, 7880 partition=partition, 7881 ) 7882 7883 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7884 this = self._parse_ordered(self._parse_opclass) 7885 7886 if not self._match(TokenType.WITH): 7887 return this 7888 7889 op = self._parse_var(any_token=True) 7890 7891 return self.expression(exp.WithOperator, this=this, op=op) 7892 7893 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7894 self._match(TokenType.EQ) 7895 self._match(TokenType.L_PAREN) 7896 7897 opts: t.List[t.Optional[exp.Expression]] = [] 7898 while self._curr and not self._match(TokenType.R_PAREN): 7899 if self._match_text_seq("FORMAT_NAME", "="): 7900 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 7901 prop = self.expression( 7902 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_table_parts() 7903 ) 7904 opts.append(prop) 7905 else: 7906 opts.append(self._parse_property()) 7907 7908 self._match(TokenType.COMMA) 7909 7910 return opts 7911 7912 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7913 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7914 7915 options = [] 7916 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7917 option = self._parse_var(any_token=True) 7918 prev = self._prev.text.upper() 7919 7920 # Different dialects might separate options and values by white space, "=" and "AS" 7921 self._match(TokenType.EQ) 7922 self._match(TokenType.ALIAS) 7923 7924 param = self.expression(exp.CopyParameter, this=option) 7925 7926 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7927 TokenType.L_PAREN, advance=False 7928 ): 7929 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7930 param.set("expressions", self._parse_wrapped_options()) 7931 elif prev == "FILE_FORMAT": 7932 # T-SQL's external file format case 7933 param.set("expression", self._parse_field()) 7934 else: 7935 param.set("expression", self._parse_unquoted_field()) 7936 7937 options.append(param) 7938 self._match(sep) 7939 7940 return options 7941 7942 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7943 expr = self.expression(exp.Credentials) 7944 7945 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7946 expr.set("storage", self._parse_field()) 7947 if self._match_text_seq("CREDENTIALS"): 7948 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7949 creds = ( 7950 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7951 ) 7952 expr.set("credentials", creds) 7953 if self._match_text_seq("ENCRYPTION"): 7954 expr.set("encryption", self._parse_wrapped_options()) 7955 if self._match_text_seq("IAM_ROLE"): 7956 expr.set("iam_role", self._parse_field()) 7957 if self._match_text_seq("REGION"): 7958 expr.set("region", self._parse_field()) 7959 7960 return expr 7961 7962 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7963 return self._parse_field() 7964 7965 def _parse_copy(self) -> exp.Copy | exp.Command: 7966 start = self._prev 7967 7968 self._match(TokenType.INTO) 7969 7970 this = ( 7971 self._parse_select(nested=True, parse_subquery_alias=False) 7972 if self._match(TokenType.L_PAREN, advance=False) 7973 else self._parse_table(schema=True) 7974 ) 7975 7976 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7977 7978 files = self._parse_csv(self._parse_file_location) 7979 credentials = self._parse_credentials() 7980 7981 self._match_text_seq("WITH") 7982 7983 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7984 7985 # Fallback case 7986 if self._curr: 7987 return self._parse_as_command(start) 7988 7989 return self.expression( 7990 exp.Copy, 7991 this=this, 7992 kind=kind, 7993 credentials=credentials, 7994 files=files, 7995 params=params, 7996 ) 7997 7998 def _parse_normalize(self) -> exp.Normalize: 7999 return self.expression( 8000 exp.Normalize, 8001 this=self._parse_bitwise(), 8002 form=self._match(TokenType.COMMA) and self._parse_var(), 8003 ) 8004 8005 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8006 args = self._parse_csv(lambda: self._parse_lambda()) 8007 8008 this = seq_get(args, 0) 8009 decimals = seq_get(args, 1) 8010 8011 return expr_type( 8012 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8013 ) 8014 8015 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8016 if self._match_text_seq("COLUMNS", "(", advance=False): 8017 this = self._parse_function() 8018 if isinstance(this, exp.Columns): 8019 this.set("unpack", True) 8020 return this 8021 8022 return self.expression( 8023 exp.Star, 8024 **{ # type: ignore 8025 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8026 "replace": self._parse_star_op("REPLACE"), 8027 "rename": self._parse_star_op("RENAME"), 8028 }, 8029 ) 8030 8031 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8032 privilege_parts = [] 8033 8034 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8035 # (end of privilege list) or L_PAREN (start of column list) are met 8036 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8037 privilege_parts.append(self._curr.text.upper()) 8038 self._advance() 8039 8040 this = exp.var(" ".join(privilege_parts)) 8041 expressions = ( 8042 self._parse_wrapped_csv(self._parse_column) 8043 if self._match(TokenType.L_PAREN, advance=False) 8044 else None 8045 ) 8046 8047 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8048 8049 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8050 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8051 principal = self._parse_id_var() 8052 8053 if not principal: 8054 return None 8055 8056 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8057 8058 def _parse_grant(self) -> exp.Grant | exp.Command: 8059 start = self._prev 8060 8061 privileges = self._parse_csv(self._parse_grant_privilege) 8062 8063 self._match(TokenType.ON) 8064 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8065 8066 # Attempt to parse the securable e.g. MySQL allows names 8067 # such as "foo.*", "*.*" which are not easily parseable yet 8068 securable = self._try_parse(self._parse_table_parts) 8069 8070 if not securable or not self._match_text_seq("TO"): 8071 return self._parse_as_command(start) 8072 8073 principals = self._parse_csv(self._parse_grant_principal) 8074 8075 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8076 8077 if self._curr: 8078 return self._parse_as_command(start) 8079 8080 return self.expression( 8081 exp.Grant, 8082 privileges=privileges, 8083 kind=kind, 8084 securable=securable, 8085 principals=principals, 8086 grant_option=grant_option, 8087 ) 8088 8089 def _parse_overlay(self) -> exp.Overlay: 8090 return self.expression( 8091 exp.Overlay, 8092 **{ # type: ignore 8093 "this": self._parse_bitwise(), 8094 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8095 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8096 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8097 }, 8098 )
27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range
59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder
102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression)
122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp
133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args)
175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.UDOUBLE, 323 TokenType.CHAR, 324 TokenType.NCHAR, 325 TokenType.VARCHAR, 326 TokenType.NVARCHAR, 327 TokenType.BPCHAR, 328 TokenType.TEXT, 329 TokenType.MEDIUMTEXT, 330 TokenType.LONGTEXT, 331 TokenType.BLOB, 332 TokenType.MEDIUMBLOB, 333 TokenType.LONGBLOB, 334 TokenType.BINARY, 335 TokenType.VARBINARY, 336 TokenType.JSON, 337 TokenType.JSONB, 338 TokenType.INTERVAL, 339 TokenType.TINYBLOB, 340 TokenType.TINYTEXT, 341 TokenType.TIME, 342 TokenType.TIMETZ, 343 TokenType.TIMESTAMP, 344 TokenType.TIMESTAMP_S, 345 TokenType.TIMESTAMP_MS, 346 TokenType.TIMESTAMP_NS, 347 TokenType.TIMESTAMPTZ, 348 TokenType.TIMESTAMPLTZ, 349 TokenType.TIMESTAMPNTZ, 350 TokenType.DATETIME, 351 TokenType.DATETIME2, 352 TokenType.DATETIME64, 353 TokenType.SMALLDATETIME, 354 TokenType.DATE, 355 TokenType.DATE32, 356 TokenType.INT4RANGE, 357 TokenType.INT4MULTIRANGE, 358 TokenType.INT8RANGE, 359 TokenType.INT8MULTIRANGE, 360 TokenType.NUMRANGE, 361 TokenType.NUMMULTIRANGE, 362 TokenType.TSRANGE, 363 TokenType.TSMULTIRANGE, 364 TokenType.TSTZRANGE, 365 TokenType.TSTZMULTIRANGE, 366 TokenType.DATERANGE, 367 TokenType.DATEMULTIRANGE, 368 TokenType.DECIMAL, 369 TokenType.DECIMAL32, 370 TokenType.DECIMAL64, 371 TokenType.DECIMAL128, 372 TokenType.DECIMAL256, 373 TokenType.UDECIMAL, 374 TokenType.BIGDECIMAL, 375 TokenType.UUID, 376 TokenType.GEOGRAPHY, 377 TokenType.GEOMETRY, 378 TokenType.POINT, 379 TokenType.RING, 380 TokenType.LINESTRING, 381 TokenType.MULTILINESTRING, 382 TokenType.POLYGON, 383 TokenType.MULTIPOLYGON, 384 TokenType.HLLSKETCH, 385 TokenType.HSTORE, 386 TokenType.PSEUDO_TYPE, 387 TokenType.SUPER, 388 TokenType.SERIAL, 389 TokenType.SMALLSERIAL, 390 TokenType.BIGSERIAL, 391 TokenType.XML, 392 TokenType.YEAR, 393 TokenType.USERDEFINED, 394 TokenType.MONEY, 395 TokenType.SMALLMONEY, 396 TokenType.ROWVERSION, 397 TokenType.IMAGE, 398 TokenType.VARIANT, 399 TokenType.VECTOR, 400 TokenType.OBJECT, 401 TokenType.OBJECT_IDENTIFIER, 402 TokenType.INET, 403 TokenType.IPADDRESS, 404 TokenType.IPPREFIX, 405 TokenType.IPV4, 406 TokenType.IPV6, 407 TokenType.UNKNOWN, 408 TokenType.NULL, 409 TokenType.NAME, 410 TokenType.TDIGEST, 411 TokenType.DYNAMIC, 412 *ENUM_TYPE_TOKENS, 413 *NESTED_TYPE_TOKENS, 414 *AGGREGATE_TYPE_TOKENS, 415 } 416 417 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 418 TokenType.BIGINT: TokenType.UBIGINT, 419 TokenType.INT: TokenType.UINT, 420 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 421 TokenType.SMALLINT: TokenType.USMALLINT, 422 TokenType.TINYINT: TokenType.UTINYINT, 423 TokenType.DECIMAL: TokenType.UDECIMAL, 424 TokenType.DOUBLE: TokenType.UDOUBLE, 425 } 426 427 SUBQUERY_PREDICATES = { 428 TokenType.ANY: exp.Any, 429 TokenType.ALL: exp.All, 430 TokenType.EXISTS: exp.Exists, 431 TokenType.SOME: exp.Any, 432 } 433 434 RESERVED_TOKENS = { 435 *Tokenizer.SINGLE_TOKENS.values(), 436 TokenType.SELECT, 437 } - {TokenType.IDENTIFIER} 438 439 DB_CREATABLES = { 440 TokenType.DATABASE, 441 TokenType.DICTIONARY, 442 TokenType.MODEL, 443 TokenType.NAMESPACE, 444 TokenType.SCHEMA, 445 TokenType.SEQUENCE, 446 TokenType.SINK, 447 TokenType.SOURCE, 448 TokenType.STORAGE_INTEGRATION, 449 TokenType.STREAMLIT, 450 TokenType.TABLE, 451 TokenType.TAG, 452 TokenType.VIEW, 453 TokenType.WAREHOUSE, 454 } 455 456 CREATABLES = { 457 TokenType.COLUMN, 458 TokenType.CONSTRAINT, 459 TokenType.FOREIGN_KEY, 460 TokenType.FUNCTION, 461 TokenType.INDEX, 462 TokenType.PROCEDURE, 463 *DB_CREATABLES, 464 } 465 466 ALTERABLES = { 467 TokenType.INDEX, 468 TokenType.TABLE, 469 TokenType.VIEW, 470 } 471 472 # Tokens that can represent identifiers 473 ID_VAR_TOKENS = { 474 TokenType.ALL, 475 TokenType.ATTACH, 476 TokenType.VAR, 477 TokenType.ANTI, 478 TokenType.APPLY, 479 TokenType.ASC, 480 TokenType.ASOF, 481 TokenType.AUTO_INCREMENT, 482 TokenType.BEGIN, 483 TokenType.BPCHAR, 484 TokenType.CACHE, 485 TokenType.CASE, 486 TokenType.COLLATE, 487 TokenType.COMMAND, 488 TokenType.COMMENT, 489 TokenType.COMMIT, 490 TokenType.CONSTRAINT, 491 TokenType.COPY, 492 TokenType.CUBE, 493 TokenType.CURRENT_SCHEMA, 494 TokenType.DEFAULT, 495 TokenType.DELETE, 496 TokenType.DESC, 497 TokenType.DESCRIBE, 498 TokenType.DETACH, 499 TokenType.DICTIONARY, 500 TokenType.DIV, 501 TokenType.END, 502 TokenType.EXECUTE, 503 TokenType.EXPORT, 504 TokenType.ESCAPE, 505 TokenType.FALSE, 506 TokenType.FIRST, 507 TokenType.FILTER, 508 TokenType.FINAL, 509 TokenType.FORMAT, 510 TokenType.FULL, 511 TokenType.IDENTIFIER, 512 TokenType.IS, 513 TokenType.ISNULL, 514 TokenType.INTERVAL, 515 TokenType.KEEP, 516 TokenType.KILL, 517 TokenType.LEFT, 518 TokenType.LIMIT, 519 TokenType.LOAD, 520 TokenType.MERGE, 521 TokenType.NATURAL, 522 TokenType.NEXT, 523 TokenType.OFFSET, 524 TokenType.OPERATOR, 525 TokenType.ORDINALITY, 526 TokenType.OVERLAPS, 527 TokenType.OVERWRITE, 528 TokenType.PARTITION, 529 TokenType.PERCENT, 530 TokenType.PIVOT, 531 TokenType.PRAGMA, 532 TokenType.PUT, 533 TokenType.RANGE, 534 TokenType.RECURSIVE, 535 TokenType.REFERENCES, 536 TokenType.REFRESH, 537 TokenType.RENAME, 538 TokenType.REPLACE, 539 TokenType.RIGHT, 540 TokenType.ROLLUP, 541 TokenType.ROW, 542 TokenType.ROWS, 543 TokenType.SEMI, 544 TokenType.SET, 545 TokenType.SETTINGS, 546 TokenType.SHOW, 547 TokenType.TEMPORARY, 548 TokenType.TOP, 549 TokenType.TRUE, 550 TokenType.TRUNCATE, 551 TokenType.UNIQUE, 552 TokenType.UNNEST, 553 TokenType.UNPIVOT, 554 TokenType.UPDATE, 555 TokenType.USE, 556 TokenType.VOLATILE, 557 TokenType.WINDOW, 558 *CREATABLES, 559 *SUBQUERY_PREDICATES, 560 *TYPE_TOKENS, 561 *NO_PAREN_FUNCTIONS, 562 } 563 ID_VAR_TOKENS.remove(TokenType.UNION) 564 565 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 566 TokenType.ANTI, 567 TokenType.APPLY, 568 TokenType.ASOF, 569 TokenType.FULL, 570 TokenType.LEFT, 571 TokenType.LOCK, 572 TokenType.NATURAL, 573 TokenType.RIGHT, 574 TokenType.SEMI, 575 TokenType.WINDOW, 576 } 577 578 ALIAS_TOKENS = ID_VAR_TOKENS 579 580 ARRAY_CONSTRUCTORS = { 581 "ARRAY": exp.Array, 582 "LIST": exp.List, 583 } 584 585 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 586 587 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 588 589 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 590 591 FUNC_TOKENS = { 592 TokenType.COLLATE, 593 TokenType.COMMAND, 594 TokenType.CURRENT_DATE, 595 TokenType.CURRENT_DATETIME, 596 TokenType.CURRENT_SCHEMA, 597 TokenType.CURRENT_TIMESTAMP, 598 TokenType.CURRENT_TIME, 599 TokenType.CURRENT_USER, 600 TokenType.FILTER, 601 TokenType.FIRST, 602 TokenType.FORMAT, 603 TokenType.GLOB, 604 TokenType.IDENTIFIER, 605 TokenType.INDEX, 606 TokenType.ISNULL, 607 TokenType.ILIKE, 608 TokenType.INSERT, 609 TokenType.LIKE, 610 TokenType.MERGE, 611 TokenType.NEXT, 612 TokenType.OFFSET, 613 TokenType.PRIMARY_KEY, 614 TokenType.RANGE, 615 TokenType.REPLACE, 616 TokenType.RLIKE, 617 TokenType.ROW, 618 TokenType.UNNEST, 619 TokenType.VAR, 620 TokenType.LEFT, 621 TokenType.RIGHT, 622 TokenType.SEQUENCE, 623 TokenType.DATE, 624 TokenType.DATETIME, 625 TokenType.TABLE, 626 TokenType.TIMESTAMP, 627 TokenType.TIMESTAMPTZ, 628 TokenType.TRUNCATE, 629 TokenType.WINDOW, 630 TokenType.XOR, 631 *TYPE_TOKENS, 632 *SUBQUERY_PREDICATES, 633 } 634 635 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 636 TokenType.AND: exp.And, 637 } 638 639 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 640 TokenType.COLON_EQ: exp.PropertyEQ, 641 } 642 643 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 644 TokenType.OR: exp.Or, 645 } 646 647 EQUALITY = { 648 TokenType.EQ: exp.EQ, 649 TokenType.NEQ: exp.NEQ, 650 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 651 } 652 653 COMPARISON = { 654 TokenType.GT: exp.GT, 655 TokenType.GTE: exp.GTE, 656 TokenType.LT: exp.LT, 657 TokenType.LTE: exp.LTE, 658 } 659 660 BITWISE = { 661 TokenType.AMP: exp.BitwiseAnd, 662 TokenType.CARET: exp.BitwiseXor, 663 TokenType.PIPE: exp.BitwiseOr, 664 } 665 666 TERM = { 667 TokenType.DASH: exp.Sub, 668 TokenType.PLUS: exp.Add, 669 TokenType.MOD: exp.Mod, 670 TokenType.COLLATE: exp.Collate, 671 } 672 673 FACTOR = { 674 TokenType.DIV: exp.IntDiv, 675 TokenType.LR_ARROW: exp.Distance, 676 TokenType.SLASH: exp.Div, 677 TokenType.STAR: exp.Mul, 678 } 679 680 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 681 682 TIMES = { 683 TokenType.TIME, 684 TokenType.TIMETZ, 685 } 686 687 TIMESTAMPS = { 688 TokenType.TIMESTAMP, 689 TokenType.TIMESTAMPNTZ, 690 TokenType.TIMESTAMPTZ, 691 TokenType.TIMESTAMPLTZ, 692 *TIMES, 693 } 694 695 SET_OPERATIONS = { 696 TokenType.UNION, 697 TokenType.INTERSECT, 698 TokenType.EXCEPT, 699 } 700 701 JOIN_METHODS = { 702 TokenType.ASOF, 703 TokenType.NATURAL, 704 TokenType.POSITIONAL, 705 } 706 707 JOIN_SIDES = { 708 TokenType.LEFT, 709 TokenType.RIGHT, 710 TokenType.FULL, 711 } 712 713 JOIN_KINDS = { 714 TokenType.ANTI, 715 TokenType.CROSS, 716 TokenType.INNER, 717 TokenType.OUTER, 718 TokenType.SEMI, 719 TokenType.STRAIGHT_JOIN, 720 } 721 722 JOIN_HINTS: t.Set[str] = set() 723 724 LAMBDAS = { 725 TokenType.ARROW: lambda self, expressions: self.expression( 726 exp.Lambda, 727 this=self._replace_lambda( 728 self._parse_assignment(), 729 expressions, 730 ), 731 expressions=expressions, 732 ), 733 TokenType.FARROW: lambda self, expressions: self.expression( 734 exp.Kwarg, 735 this=exp.var(expressions[0].name), 736 expression=self._parse_assignment(), 737 ), 738 } 739 740 COLUMN_OPERATORS = { 741 TokenType.DOT: None, 742 TokenType.DOTCOLON: lambda self, this, to: self.expression( 743 exp.JSONCast, 744 this=this, 745 to=to, 746 ), 747 TokenType.DCOLON: lambda self, this, to: self.expression( 748 exp.Cast if self.STRICT_CAST else exp.TryCast, 749 this=this, 750 to=to, 751 ), 752 TokenType.ARROW: lambda self, this, path: self.expression( 753 exp.JSONExtract, 754 this=this, 755 expression=self.dialect.to_json_path(path), 756 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 757 ), 758 TokenType.DARROW: lambda self, this, path: self.expression( 759 exp.JSONExtractScalar, 760 this=this, 761 expression=self.dialect.to_json_path(path), 762 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 763 ), 764 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 765 exp.JSONBExtract, 766 this=this, 767 expression=path, 768 ), 769 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 770 exp.JSONBExtractScalar, 771 this=this, 772 expression=path, 773 ), 774 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 775 exp.JSONBContains, 776 this=this, 777 expression=key, 778 ), 779 } 780 781 EXPRESSION_PARSERS = { 782 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 783 exp.Column: lambda self: self._parse_column(), 784 exp.Condition: lambda self: self._parse_assignment(), 785 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 786 exp.Expression: lambda self: self._parse_expression(), 787 exp.From: lambda self: self._parse_from(joins=True), 788 exp.Group: lambda self: self._parse_group(), 789 exp.Having: lambda self: self._parse_having(), 790 exp.Hint: lambda self: self._parse_hint_body(), 791 exp.Identifier: lambda self: self._parse_id_var(), 792 exp.Join: lambda self: self._parse_join(), 793 exp.Lambda: lambda self: self._parse_lambda(), 794 exp.Lateral: lambda self: self._parse_lateral(), 795 exp.Limit: lambda self: self._parse_limit(), 796 exp.Offset: lambda self: self._parse_offset(), 797 exp.Order: lambda self: self._parse_order(), 798 exp.Ordered: lambda self: self._parse_ordered(), 799 exp.Properties: lambda self: self._parse_properties(), 800 exp.Qualify: lambda self: self._parse_qualify(), 801 exp.Returning: lambda self: self._parse_returning(), 802 exp.Select: lambda self: self._parse_select(), 803 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 804 exp.Table: lambda self: self._parse_table_parts(), 805 exp.TableAlias: lambda self: self._parse_table_alias(), 806 exp.Tuple: lambda self: self._parse_value(), 807 exp.Whens: lambda self: self._parse_when_matched(), 808 exp.Where: lambda self: self._parse_where(), 809 exp.Window: lambda self: self._parse_named_window(), 810 exp.With: lambda self: self._parse_with(), 811 "JOIN_TYPE": lambda self: self._parse_join_parts(), 812 } 813 814 STATEMENT_PARSERS = { 815 TokenType.ALTER: lambda self: self._parse_alter(), 816 TokenType.ANALYZE: lambda self: self._parse_analyze(), 817 TokenType.BEGIN: lambda self: self._parse_transaction(), 818 TokenType.CACHE: lambda self: self._parse_cache(), 819 TokenType.COMMENT: lambda self: self._parse_comment(), 820 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 821 TokenType.COPY: lambda self: self._parse_copy(), 822 TokenType.CREATE: lambda self: self._parse_create(), 823 TokenType.DELETE: lambda self: self._parse_delete(), 824 TokenType.DESC: lambda self: self._parse_describe(), 825 TokenType.DESCRIBE: lambda self: self._parse_describe(), 826 TokenType.DROP: lambda self: self._parse_drop(), 827 TokenType.GRANT: lambda self: self._parse_grant(), 828 TokenType.INSERT: lambda self: self._parse_insert(), 829 TokenType.KILL: lambda self: self._parse_kill(), 830 TokenType.LOAD: lambda self: self._parse_load(), 831 TokenType.MERGE: lambda self: self._parse_merge(), 832 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 833 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 834 TokenType.REFRESH: lambda self: self._parse_refresh(), 835 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 836 TokenType.SET: lambda self: self._parse_set(), 837 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 838 TokenType.UNCACHE: lambda self: self._parse_uncache(), 839 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 840 TokenType.UPDATE: lambda self: self._parse_update(), 841 TokenType.USE: lambda self: self._parse_use(), 842 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 843 } 844 845 UNARY_PARSERS = { 846 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 847 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 848 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 849 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 850 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 851 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 852 } 853 854 STRING_PARSERS = { 855 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 856 exp.RawString, this=token.text 857 ), 858 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 859 exp.National, this=token.text 860 ), 861 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 862 TokenType.STRING: lambda self, token: self.expression( 863 exp.Literal, this=token.text, is_string=True 864 ), 865 TokenType.UNICODE_STRING: lambda self, token: self.expression( 866 exp.UnicodeString, 867 this=token.text, 868 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 869 ), 870 } 871 872 NUMERIC_PARSERS = { 873 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 874 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 875 TokenType.HEX_STRING: lambda self, token: self.expression( 876 exp.HexString, 877 this=token.text, 878 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 879 ), 880 TokenType.NUMBER: lambda self, token: self.expression( 881 exp.Literal, this=token.text, is_string=False 882 ), 883 } 884 885 PRIMARY_PARSERS = { 886 **STRING_PARSERS, 887 **NUMERIC_PARSERS, 888 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 889 TokenType.NULL: lambda self, _: self.expression(exp.Null), 890 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 891 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 892 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 893 TokenType.STAR: lambda self, _: self._parse_star_ops(), 894 } 895 896 PLACEHOLDER_PARSERS = { 897 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 898 TokenType.PARAMETER: lambda self: self._parse_parameter(), 899 TokenType.COLON: lambda self: ( 900 self.expression(exp.Placeholder, this=self._prev.text) 901 if self._match_set(self.ID_VAR_TOKENS) 902 else None 903 ), 904 } 905 906 RANGE_PARSERS = { 907 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 908 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 909 TokenType.GLOB: binary_range_parser(exp.Glob), 910 TokenType.ILIKE: binary_range_parser(exp.ILike), 911 TokenType.IN: lambda self, this: self._parse_in(this), 912 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 913 TokenType.IS: lambda self, this: self._parse_is(this), 914 TokenType.LIKE: binary_range_parser(exp.Like), 915 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 916 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 917 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 918 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 919 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 920 } 921 922 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 923 "ALLOWED_VALUES": lambda self: self.expression( 924 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 925 ), 926 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 927 "AUTO": lambda self: self._parse_auto_property(), 928 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 929 "BACKUP": lambda self: self.expression( 930 exp.BackupProperty, this=self._parse_var(any_token=True) 931 ), 932 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 933 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 934 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 935 "CHECKSUM": lambda self: self._parse_checksum(), 936 "CLUSTER BY": lambda self: self._parse_cluster(), 937 "CLUSTERED": lambda self: self._parse_clustered_by(), 938 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 939 exp.CollateProperty, **kwargs 940 ), 941 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 942 "CONTAINS": lambda self: self._parse_contains_property(), 943 "COPY": lambda self: self._parse_copy_property(), 944 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 945 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 946 "DEFINER": lambda self: self._parse_definer(), 947 "DETERMINISTIC": lambda self: self.expression( 948 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 949 ), 950 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 951 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 952 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 953 "DISTKEY": lambda self: self._parse_distkey(), 954 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 955 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 956 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 957 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 958 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 959 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 960 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 961 "FREESPACE": lambda self: self._parse_freespace(), 962 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 963 "HEAP": lambda self: self.expression(exp.HeapProperty), 964 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 965 "IMMUTABLE": lambda self: self.expression( 966 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 967 ), 968 "INHERITS": lambda self: self.expression( 969 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 970 ), 971 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 972 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 973 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 974 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 975 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 976 "LIKE": lambda self: self._parse_create_like(), 977 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 978 "LOCK": lambda self: self._parse_locking(), 979 "LOCKING": lambda self: self._parse_locking(), 980 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 981 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 982 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 983 "MODIFIES": lambda self: self._parse_modifies_property(), 984 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 985 "NO": lambda self: self._parse_no_property(), 986 "ON": lambda self: self._parse_on_property(), 987 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 988 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 989 "PARTITION": lambda self: self._parse_partitioned_of(), 990 "PARTITION BY": lambda self: self._parse_partitioned_by(), 991 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 992 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 993 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 994 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 995 "READS": lambda self: self._parse_reads_property(), 996 "REMOTE": lambda self: self._parse_remote_with_connection(), 997 "RETURNS": lambda self: self._parse_returns(), 998 "STRICT": lambda self: self.expression(exp.StrictProperty), 999 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1000 "ROW": lambda self: self._parse_row(), 1001 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1002 "SAMPLE": lambda self: self.expression( 1003 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1004 ), 1005 "SECURE": lambda self: self.expression(exp.SecureProperty), 1006 "SECURITY": lambda self: self._parse_security(), 1007 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1008 "SETTINGS": lambda self: self._parse_settings_property(), 1009 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1010 "SORTKEY": lambda self: self._parse_sortkey(), 1011 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1012 "STABLE": lambda self: self.expression( 1013 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1014 ), 1015 "STORED": lambda self: self._parse_stored(), 1016 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1017 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1018 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1019 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1020 "TO": lambda self: self._parse_to_table(), 1021 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1022 "TRANSFORM": lambda self: self.expression( 1023 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1024 ), 1025 "TTL": lambda self: self._parse_ttl(), 1026 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1027 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1028 "VOLATILE": lambda self: self._parse_volatile_property(), 1029 "WITH": lambda self: self._parse_with_property(), 1030 } 1031 1032 CONSTRAINT_PARSERS = { 1033 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1034 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1035 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1036 "CHARACTER SET": lambda self: self.expression( 1037 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1038 ), 1039 "CHECK": lambda self: self.expression( 1040 exp.CheckColumnConstraint, 1041 this=self._parse_wrapped(self._parse_assignment), 1042 enforced=self._match_text_seq("ENFORCED"), 1043 ), 1044 "COLLATE": lambda self: self.expression( 1045 exp.CollateColumnConstraint, 1046 this=self._parse_identifier() or self._parse_column(), 1047 ), 1048 "COMMENT": lambda self: self.expression( 1049 exp.CommentColumnConstraint, this=self._parse_string() 1050 ), 1051 "COMPRESS": lambda self: self._parse_compress(), 1052 "CLUSTERED": lambda self: self.expression( 1053 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1054 ), 1055 "NONCLUSTERED": lambda self: self.expression( 1056 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1057 ), 1058 "DEFAULT": lambda self: self.expression( 1059 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1060 ), 1061 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1062 "EPHEMERAL": lambda self: self.expression( 1063 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1064 ), 1065 "EXCLUDE": lambda self: self.expression( 1066 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1067 ), 1068 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1069 "FORMAT": lambda self: self.expression( 1070 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1071 ), 1072 "GENERATED": lambda self: self._parse_generated_as_identity(), 1073 "IDENTITY": lambda self: self._parse_auto_increment(), 1074 "INLINE": lambda self: self._parse_inline(), 1075 "LIKE": lambda self: self._parse_create_like(), 1076 "NOT": lambda self: self._parse_not_constraint(), 1077 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1078 "ON": lambda self: ( 1079 self._match(TokenType.UPDATE) 1080 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1081 ) 1082 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1083 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1084 "PERIOD": lambda self: self._parse_period_for_system_time(), 1085 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1086 "REFERENCES": lambda self: self._parse_references(match=False), 1087 "TITLE": lambda self: self.expression( 1088 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1089 ), 1090 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1091 "UNIQUE": lambda self: self._parse_unique(), 1092 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1093 "WATERMARK": lambda self: self.expression( 1094 exp.WatermarkColumnConstraint, 1095 this=self._match(TokenType.FOR) and self._parse_column(), 1096 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1097 ), 1098 "WITH": lambda self: self.expression( 1099 exp.Properties, expressions=self._parse_wrapped_properties() 1100 ), 1101 } 1102 1103 ALTER_PARSERS = { 1104 "ADD": lambda self: self._parse_alter_table_add(), 1105 "AS": lambda self: self._parse_select(), 1106 "ALTER": lambda self: self._parse_alter_table_alter(), 1107 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1108 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1109 "DROP": lambda self: self._parse_alter_table_drop(), 1110 "RENAME": lambda self: self._parse_alter_table_rename(), 1111 "SET": lambda self: self._parse_alter_table_set(), 1112 "SWAP": lambda self: self.expression( 1113 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1114 ), 1115 } 1116 1117 ALTER_ALTER_PARSERS = { 1118 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1119 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1120 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1121 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1122 } 1123 1124 SCHEMA_UNNAMED_CONSTRAINTS = { 1125 "CHECK", 1126 "EXCLUDE", 1127 "FOREIGN KEY", 1128 "LIKE", 1129 "PERIOD", 1130 "PRIMARY KEY", 1131 "UNIQUE", 1132 "WATERMARK", 1133 } 1134 1135 NO_PAREN_FUNCTION_PARSERS = { 1136 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1137 "CASE": lambda self: self._parse_case(), 1138 "CONNECT_BY_ROOT": lambda self: self.expression( 1139 exp.ConnectByRoot, this=self._parse_column() 1140 ), 1141 "IF": lambda self: self._parse_if(), 1142 } 1143 1144 INVALID_FUNC_NAME_TOKENS = { 1145 TokenType.IDENTIFIER, 1146 TokenType.STRING, 1147 } 1148 1149 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1150 1151 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1152 1153 FUNCTION_PARSERS = { 1154 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1155 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1156 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1157 "DECODE": lambda self: self._parse_decode(), 1158 "EXTRACT": lambda self: self._parse_extract(), 1159 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1160 "GAP_FILL": lambda self: self._parse_gap_fill(), 1161 "JSON_OBJECT": lambda self: self._parse_json_object(), 1162 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1163 "JSON_TABLE": lambda self: self._parse_json_table(), 1164 "MATCH": lambda self: self._parse_match_against(), 1165 "NORMALIZE": lambda self: self._parse_normalize(), 1166 "OPENJSON": lambda self: self._parse_open_json(), 1167 "OVERLAY": lambda self: self._parse_overlay(), 1168 "POSITION": lambda self: self._parse_position(), 1169 "PREDICT": lambda self: self._parse_predict(), 1170 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1171 "STRING_AGG": lambda self: self._parse_string_agg(), 1172 "SUBSTRING": lambda self: self._parse_substring(), 1173 "TRIM": lambda self: self._parse_trim(), 1174 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1175 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1176 "XMLELEMENT": lambda self: self.expression( 1177 exp.XMLElement, 1178 this=self._match_text_seq("NAME") and self._parse_id_var(), 1179 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1180 ), 1181 "XMLTABLE": lambda self: self._parse_xml_table(), 1182 } 1183 1184 QUERY_MODIFIER_PARSERS = { 1185 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1186 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1187 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1188 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1189 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1190 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1191 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1192 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1193 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1194 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1195 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1196 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1197 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1198 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1199 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1200 TokenType.CLUSTER_BY: lambda self: ( 1201 "cluster", 1202 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1203 ), 1204 TokenType.DISTRIBUTE_BY: lambda self: ( 1205 "distribute", 1206 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1207 ), 1208 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1209 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1210 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1211 } 1212 1213 SET_PARSERS = { 1214 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1215 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1216 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1217 "TRANSACTION": lambda self: self._parse_set_transaction(), 1218 } 1219 1220 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1221 1222 TYPE_LITERAL_PARSERS = { 1223 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1224 } 1225 1226 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1227 1228 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1229 1230 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1231 1232 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1233 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1234 "ISOLATION": ( 1235 ("LEVEL", "REPEATABLE", "READ"), 1236 ("LEVEL", "READ", "COMMITTED"), 1237 ("LEVEL", "READ", "UNCOMITTED"), 1238 ("LEVEL", "SERIALIZABLE"), 1239 ), 1240 "READ": ("WRITE", "ONLY"), 1241 } 1242 1243 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1244 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1245 ) 1246 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1247 1248 CREATE_SEQUENCE: OPTIONS_TYPE = { 1249 "SCALE": ("EXTEND", "NOEXTEND"), 1250 "SHARD": ("EXTEND", "NOEXTEND"), 1251 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1252 **dict.fromkeys( 1253 ( 1254 "SESSION", 1255 "GLOBAL", 1256 "KEEP", 1257 "NOKEEP", 1258 "ORDER", 1259 "NOORDER", 1260 "NOCACHE", 1261 "CYCLE", 1262 "NOCYCLE", 1263 "NOMINVALUE", 1264 "NOMAXVALUE", 1265 "NOSCALE", 1266 "NOSHARD", 1267 ), 1268 tuple(), 1269 ), 1270 } 1271 1272 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1273 1274 USABLES: OPTIONS_TYPE = dict.fromkeys( 1275 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1276 ) 1277 1278 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1279 1280 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1281 "TYPE": ("EVOLUTION",), 1282 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1283 } 1284 1285 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1286 1287 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1288 1289 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1290 "NOT": ("ENFORCED",), 1291 "MATCH": ( 1292 "FULL", 1293 "PARTIAL", 1294 "SIMPLE", 1295 ), 1296 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1297 "USING": ( 1298 "BTREE", 1299 "HASH", 1300 ), 1301 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1302 } 1303 1304 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1305 1306 CLONE_KEYWORDS = {"CLONE", "COPY"} 1307 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1308 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1309 1310 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1311 1312 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1313 1314 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1315 1316 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1317 1318 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1319 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1320 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1321 1322 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1323 1324 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1325 1326 ADD_CONSTRAINT_TOKENS = { 1327 TokenType.CONSTRAINT, 1328 TokenType.FOREIGN_KEY, 1329 TokenType.INDEX, 1330 TokenType.KEY, 1331 TokenType.PRIMARY_KEY, 1332 TokenType.UNIQUE, 1333 } 1334 1335 DISTINCT_TOKENS = {TokenType.DISTINCT} 1336 1337 NULL_TOKENS = {TokenType.NULL} 1338 1339 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1340 1341 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1342 1343 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1344 1345 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1346 1347 ODBC_DATETIME_LITERALS = { 1348 "d": exp.Date, 1349 "t": exp.Time, 1350 "ts": exp.Timestamp, 1351 } 1352 1353 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1354 1355 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1356 1357 # The style options for the DESCRIBE statement 1358 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1359 1360 # The style options for the ANALYZE statement 1361 ANALYZE_STYLES = { 1362 "BUFFER_USAGE_LIMIT", 1363 "FULL", 1364 "LOCAL", 1365 "NO_WRITE_TO_BINLOG", 1366 "SAMPLE", 1367 "SKIP_LOCKED", 1368 "VERBOSE", 1369 } 1370 1371 ANALYZE_EXPRESSION_PARSERS = { 1372 "ALL": lambda self: self._parse_analyze_columns(), 1373 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1374 "DELETE": lambda self: self._parse_analyze_delete(), 1375 "DROP": lambda self: self._parse_analyze_histogram(), 1376 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1377 "LIST": lambda self: self._parse_analyze_list(), 1378 "PREDICATE": lambda self: self._parse_analyze_columns(), 1379 "UPDATE": lambda self: self._parse_analyze_histogram(), 1380 "VALIDATE": lambda self: self._parse_analyze_validate(), 1381 } 1382 1383 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1384 1385 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1386 1387 OPERATION_MODIFIERS: t.Set[str] = set() 1388 1389 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1390 1391 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1392 1393 STRICT_CAST = True 1394 1395 PREFIXED_PIVOT_COLUMNS = False 1396 IDENTIFY_PIVOT_STRINGS = False 1397 1398 LOG_DEFAULTS_TO_LN = False 1399 1400 # Whether ADD is present for each column added by ALTER TABLE 1401 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1402 1403 # Whether the table sample clause expects CSV syntax 1404 TABLESAMPLE_CSV = False 1405 1406 # The default method used for table sampling 1407 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1408 1409 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1410 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1411 1412 # Whether the TRIM function expects the characters to trim as its first argument 1413 TRIM_PATTERN_FIRST = False 1414 1415 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1416 STRING_ALIASES = False 1417 1418 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1419 MODIFIERS_ATTACHED_TO_SET_OP = True 1420 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1421 1422 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1423 NO_PAREN_IF_COMMANDS = True 1424 1425 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1426 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1427 1428 # Whether the `:` operator is used to extract a value from a VARIANT column 1429 COLON_IS_VARIANT_EXTRACT = False 1430 1431 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1432 # If this is True and '(' is not found, the keyword will be treated as an identifier 1433 VALUES_FOLLOWED_BY_PAREN = True 1434 1435 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1436 SUPPORTS_IMPLICIT_UNNEST = False 1437 1438 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1439 INTERVAL_SPANS = True 1440 1441 # Whether a PARTITION clause can follow a table reference 1442 SUPPORTS_PARTITION_SELECTION = False 1443 1444 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1445 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1446 1447 # Whether the 'AS' keyword is optional in the CTE definition syntax 1448 OPTIONAL_ALIAS_TOKEN_CTE = True 1449 1450 __slots__ = ( 1451 "error_level", 1452 "error_message_context", 1453 "max_errors", 1454 "dialect", 1455 "sql", 1456 "errors", 1457 "_tokens", 1458 "_index", 1459 "_curr", 1460 "_next", 1461 "_prev", 1462 "_prev_comments", 1463 ) 1464 1465 # Autofilled 1466 SHOW_TRIE: t.Dict = {} 1467 SET_TRIE: t.Dict = {} 1468 1469 def __init__( 1470 self, 1471 error_level: t.Optional[ErrorLevel] = None, 1472 error_message_context: int = 100, 1473 max_errors: int = 3, 1474 dialect: DialectType = None, 1475 ): 1476 from sqlglot.dialects import Dialect 1477 1478 self.error_level = error_level or ErrorLevel.IMMEDIATE 1479 self.error_message_context = error_message_context 1480 self.max_errors = max_errors 1481 self.dialect = Dialect.get_or_raise(dialect) 1482 self.reset() 1483 1484 def reset(self): 1485 self.sql = "" 1486 self.errors = [] 1487 self._tokens = [] 1488 self._index = 0 1489 self._curr = None 1490 self._next = None 1491 self._prev = None 1492 self._prev_comments = None 1493 1494 def parse( 1495 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1496 ) -> t.List[t.Optional[exp.Expression]]: 1497 """ 1498 Parses a list of tokens and returns a list of syntax trees, one tree 1499 per parsed SQL statement. 1500 1501 Args: 1502 raw_tokens: The list of tokens. 1503 sql: The original SQL string, used to produce helpful debug messages. 1504 1505 Returns: 1506 The list of the produced syntax trees. 1507 """ 1508 return self._parse( 1509 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1510 ) 1511 1512 def parse_into( 1513 self, 1514 expression_types: exp.IntoType, 1515 raw_tokens: t.List[Token], 1516 sql: t.Optional[str] = None, 1517 ) -> t.List[t.Optional[exp.Expression]]: 1518 """ 1519 Parses a list of tokens into a given Expression type. If a collection of Expression 1520 types is given instead, this method will try to parse the token list into each one 1521 of them, stopping at the first for which the parsing succeeds. 1522 1523 Args: 1524 expression_types: The expression type(s) to try and parse the token list into. 1525 raw_tokens: The list of tokens. 1526 sql: The original SQL string, used to produce helpful debug messages. 1527 1528 Returns: 1529 The target Expression. 1530 """ 1531 errors = [] 1532 for expression_type in ensure_list(expression_types): 1533 parser = self.EXPRESSION_PARSERS.get(expression_type) 1534 if not parser: 1535 raise TypeError(f"No parser registered for {expression_type}") 1536 1537 try: 1538 return self._parse(parser, raw_tokens, sql) 1539 except ParseError as e: 1540 e.errors[0]["into_expression"] = expression_type 1541 errors.append(e) 1542 1543 raise ParseError( 1544 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1545 errors=merge_errors(errors), 1546 ) from errors[-1] 1547 1548 def _parse( 1549 self, 1550 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1551 raw_tokens: t.List[Token], 1552 sql: t.Optional[str] = None, 1553 ) -> t.List[t.Optional[exp.Expression]]: 1554 self.reset() 1555 self.sql = sql or "" 1556 1557 total = len(raw_tokens) 1558 chunks: t.List[t.List[Token]] = [[]] 1559 1560 for i, token in enumerate(raw_tokens): 1561 if token.token_type == TokenType.SEMICOLON: 1562 if token.comments: 1563 chunks.append([token]) 1564 1565 if i < total - 1: 1566 chunks.append([]) 1567 else: 1568 chunks[-1].append(token) 1569 1570 expressions = [] 1571 1572 for tokens in chunks: 1573 self._index = -1 1574 self._tokens = tokens 1575 self._advance() 1576 1577 expressions.append(parse_method(self)) 1578 1579 if self._index < len(self._tokens): 1580 self.raise_error("Invalid expression / Unexpected token") 1581 1582 self.check_errors() 1583 1584 return expressions 1585 1586 def check_errors(self) -> None: 1587 """Logs or raises any found errors, depending on the chosen error level setting.""" 1588 if self.error_level == ErrorLevel.WARN: 1589 for error in self.errors: 1590 logger.error(str(error)) 1591 elif self.error_level == ErrorLevel.RAISE and self.errors: 1592 raise ParseError( 1593 concat_messages(self.errors, self.max_errors), 1594 errors=merge_errors(self.errors), 1595 ) 1596 1597 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1598 """ 1599 Appends an error in the list of recorded errors or raises it, depending on the chosen 1600 error level setting. 1601 """ 1602 token = token or self._curr or self._prev or Token.string("") 1603 start = token.start 1604 end = token.end + 1 1605 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1606 highlight = self.sql[start:end] 1607 end_context = self.sql[end : end + self.error_message_context] 1608 1609 error = ParseError.new( 1610 f"{message}. Line {token.line}, Col: {token.col}.\n" 1611 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1612 description=message, 1613 line=token.line, 1614 col=token.col, 1615 start_context=start_context, 1616 highlight=highlight, 1617 end_context=end_context, 1618 ) 1619 1620 if self.error_level == ErrorLevel.IMMEDIATE: 1621 raise error 1622 1623 self.errors.append(error) 1624 1625 def expression( 1626 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1627 ) -> E: 1628 """ 1629 Creates a new, validated Expression. 1630 1631 Args: 1632 exp_class: The expression class to instantiate. 1633 comments: An optional list of comments to attach to the expression. 1634 kwargs: The arguments to set for the expression along with their respective values. 1635 1636 Returns: 1637 The target expression. 1638 """ 1639 instance = exp_class(**kwargs) 1640 instance.add_comments(comments) if comments else self._add_comments(instance) 1641 return self.validate_expression(instance) 1642 1643 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1644 if expression and self._prev_comments: 1645 expression.add_comments(self._prev_comments) 1646 self._prev_comments = None 1647 1648 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1649 """ 1650 Validates an Expression, making sure that all its mandatory arguments are set. 1651 1652 Args: 1653 expression: The expression to validate. 1654 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1655 1656 Returns: 1657 The validated expression. 1658 """ 1659 if self.error_level != ErrorLevel.IGNORE: 1660 for error_message in expression.error_messages(args): 1661 self.raise_error(error_message) 1662 1663 return expression 1664 1665 def _find_sql(self, start: Token, end: Token) -> str: 1666 return self.sql[start.start : end.end + 1] 1667 1668 def _is_connected(self) -> bool: 1669 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1670 1671 def _advance(self, times: int = 1) -> None: 1672 self._index += times 1673 self._curr = seq_get(self._tokens, self._index) 1674 self._next = seq_get(self._tokens, self._index + 1) 1675 1676 if self._index > 0: 1677 self._prev = self._tokens[self._index - 1] 1678 self._prev_comments = self._prev.comments 1679 else: 1680 self._prev = None 1681 self._prev_comments = None 1682 1683 def _retreat(self, index: int) -> None: 1684 if index != self._index: 1685 self._advance(index - self._index) 1686 1687 def _warn_unsupported(self) -> None: 1688 if len(self._tokens) <= 1: 1689 return 1690 1691 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1692 # interested in emitting a warning for the one being currently processed. 1693 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1694 1695 logger.warning( 1696 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1697 ) 1698 1699 def _parse_command(self) -> exp.Command: 1700 self._warn_unsupported() 1701 return self.expression( 1702 exp.Command, 1703 comments=self._prev_comments, 1704 this=self._prev.text.upper(), 1705 expression=self._parse_string(), 1706 ) 1707 1708 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1709 """ 1710 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1711 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1712 solve this by setting & resetting the parser state accordingly 1713 """ 1714 index = self._index 1715 error_level = self.error_level 1716 1717 self.error_level = ErrorLevel.IMMEDIATE 1718 try: 1719 this = parse_method() 1720 except ParseError: 1721 this = None 1722 finally: 1723 if not this or retreat: 1724 self._retreat(index) 1725 self.error_level = error_level 1726 1727 return this 1728 1729 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1730 start = self._prev 1731 exists = self._parse_exists() if allow_exists else None 1732 1733 self._match(TokenType.ON) 1734 1735 materialized = self._match_text_seq("MATERIALIZED") 1736 kind = self._match_set(self.CREATABLES) and self._prev 1737 if not kind: 1738 return self._parse_as_command(start) 1739 1740 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1741 this = self._parse_user_defined_function(kind=kind.token_type) 1742 elif kind.token_type == TokenType.TABLE: 1743 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1744 elif kind.token_type == TokenType.COLUMN: 1745 this = self._parse_column() 1746 else: 1747 this = self._parse_id_var() 1748 1749 self._match(TokenType.IS) 1750 1751 return self.expression( 1752 exp.Comment, 1753 this=this, 1754 kind=kind.text, 1755 expression=self._parse_string(), 1756 exists=exists, 1757 materialized=materialized, 1758 ) 1759 1760 def _parse_to_table( 1761 self, 1762 ) -> exp.ToTableProperty: 1763 table = self._parse_table_parts(schema=True) 1764 return self.expression(exp.ToTableProperty, this=table) 1765 1766 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1767 def _parse_ttl(self) -> exp.Expression: 1768 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1769 this = self._parse_bitwise() 1770 1771 if self._match_text_seq("DELETE"): 1772 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1773 if self._match_text_seq("RECOMPRESS"): 1774 return self.expression( 1775 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1776 ) 1777 if self._match_text_seq("TO", "DISK"): 1778 return self.expression( 1779 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1780 ) 1781 if self._match_text_seq("TO", "VOLUME"): 1782 return self.expression( 1783 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1784 ) 1785 1786 return this 1787 1788 expressions = self._parse_csv(_parse_ttl_action) 1789 where = self._parse_where() 1790 group = self._parse_group() 1791 1792 aggregates = None 1793 if group and self._match(TokenType.SET): 1794 aggregates = self._parse_csv(self._parse_set_item) 1795 1796 return self.expression( 1797 exp.MergeTreeTTL, 1798 expressions=expressions, 1799 where=where, 1800 group=group, 1801 aggregates=aggregates, 1802 ) 1803 1804 def _parse_statement(self) -> t.Optional[exp.Expression]: 1805 if self._curr is None: 1806 return None 1807 1808 if self._match_set(self.STATEMENT_PARSERS): 1809 comments = self._prev_comments 1810 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1811 stmt.add_comments(comments, prepend=True) 1812 return stmt 1813 1814 if self._match_set(self.dialect.tokenizer.COMMANDS): 1815 return self._parse_command() 1816 1817 expression = self._parse_expression() 1818 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1819 return self._parse_query_modifiers(expression) 1820 1821 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1822 start = self._prev 1823 temporary = self._match(TokenType.TEMPORARY) 1824 materialized = self._match_text_seq("MATERIALIZED") 1825 1826 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1827 if not kind: 1828 return self._parse_as_command(start) 1829 1830 concurrently = self._match_text_seq("CONCURRENTLY") 1831 if_exists = exists or self._parse_exists() 1832 1833 if kind == "COLUMN": 1834 this = self._parse_column() 1835 else: 1836 this = self._parse_table_parts( 1837 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1838 ) 1839 1840 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1841 1842 if self._match(TokenType.L_PAREN, advance=False): 1843 expressions = self._parse_wrapped_csv(self._parse_types) 1844 else: 1845 expressions = None 1846 1847 return self.expression( 1848 exp.Drop, 1849 exists=if_exists, 1850 this=this, 1851 expressions=expressions, 1852 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1853 temporary=temporary, 1854 materialized=materialized, 1855 cascade=self._match_text_seq("CASCADE"), 1856 constraints=self._match_text_seq("CONSTRAINTS"), 1857 purge=self._match_text_seq("PURGE"), 1858 cluster=cluster, 1859 concurrently=concurrently, 1860 ) 1861 1862 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1863 return ( 1864 self._match_text_seq("IF") 1865 and (not not_ or self._match(TokenType.NOT)) 1866 and self._match(TokenType.EXISTS) 1867 ) 1868 1869 def _parse_create(self) -> exp.Create | exp.Command: 1870 # Note: this can't be None because we've matched a statement parser 1871 start = self._prev 1872 1873 replace = ( 1874 start.token_type == TokenType.REPLACE 1875 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1876 or self._match_pair(TokenType.OR, TokenType.ALTER) 1877 ) 1878 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1879 1880 unique = self._match(TokenType.UNIQUE) 1881 1882 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1883 clustered = True 1884 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1885 "COLUMNSTORE" 1886 ): 1887 clustered = False 1888 else: 1889 clustered = None 1890 1891 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1892 self._advance() 1893 1894 properties = None 1895 create_token = self._match_set(self.CREATABLES) and self._prev 1896 1897 if not create_token: 1898 # exp.Properties.Location.POST_CREATE 1899 properties = self._parse_properties() 1900 create_token = self._match_set(self.CREATABLES) and self._prev 1901 1902 if not properties or not create_token: 1903 return self._parse_as_command(start) 1904 1905 concurrently = self._match_text_seq("CONCURRENTLY") 1906 exists = self._parse_exists(not_=True) 1907 this = None 1908 expression: t.Optional[exp.Expression] = None 1909 indexes = None 1910 no_schema_binding = None 1911 begin = None 1912 end = None 1913 clone = None 1914 1915 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1916 nonlocal properties 1917 if properties and temp_props: 1918 properties.expressions.extend(temp_props.expressions) 1919 elif temp_props: 1920 properties = temp_props 1921 1922 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1923 this = self._parse_user_defined_function(kind=create_token.token_type) 1924 1925 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1926 extend_props(self._parse_properties()) 1927 1928 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1929 extend_props(self._parse_properties()) 1930 1931 if not expression: 1932 if self._match(TokenType.COMMAND): 1933 expression = self._parse_as_command(self._prev) 1934 else: 1935 begin = self._match(TokenType.BEGIN) 1936 return_ = self._match_text_seq("RETURN") 1937 1938 if self._match(TokenType.STRING, advance=False): 1939 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1940 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1941 expression = self._parse_string() 1942 extend_props(self._parse_properties()) 1943 else: 1944 expression = self._parse_user_defined_function_expression() 1945 1946 end = self._match_text_seq("END") 1947 1948 if return_: 1949 expression = self.expression(exp.Return, this=expression) 1950 elif create_token.token_type == TokenType.INDEX: 1951 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1952 if not self._match(TokenType.ON): 1953 index = self._parse_id_var() 1954 anonymous = False 1955 else: 1956 index = None 1957 anonymous = True 1958 1959 this = self._parse_index(index=index, anonymous=anonymous) 1960 elif create_token.token_type in self.DB_CREATABLES: 1961 table_parts = self._parse_table_parts( 1962 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1963 ) 1964 1965 # exp.Properties.Location.POST_NAME 1966 self._match(TokenType.COMMA) 1967 extend_props(self._parse_properties(before=True)) 1968 1969 this = self._parse_schema(this=table_parts) 1970 1971 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1972 extend_props(self._parse_properties()) 1973 1974 self._match(TokenType.ALIAS) 1975 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1976 # exp.Properties.Location.POST_ALIAS 1977 extend_props(self._parse_properties()) 1978 1979 if create_token.token_type == TokenType.SEQUENCE: 1980 expression = self._parse_types() 1981 extend_props(self._parse_properties()) 1982 else: 1983 expression = self._parse_ddl_select() 1984 1985 if create_token.token_type == TokenType.TABLE: 1986 # exp.Properties.Location.POST_EXPRESSION 1987 extend_props(self._parse_properties()) 1988 1989 indexes = [] 1990 while True: 1991 index = self._parse_index() 1992 1993 # exp.Properties.Location.POST_INDEX 1994 extend_props(self._parse_properties()) 1995 if not index: 1996 break 1997 else: 1998 self._match(TokenType.COMMA) 1999 indexes.append(index) 2000 elif create_token.token_type == TokenType.VIEW: 2001 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2002 no_schema_binding = True 2003 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2004 extend_props(self._parse_properties()) 2005 2006 shallow = self._match_text_seq("SHALLOW") 2007 2008 if self._match_texts(self.CLONE_KEYWORDS): 2009 copy = self._prev.text.lower() == "copy" 2010 clone = self.expression( 2011 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2012 ) 2013 2014 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2015 return self._parse_as_command(start) 2016 2017 create_kind_text = create_token.text.upper() 2018 return self.expression( 2019 exp.Create, 2020 this=this, 2021 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2022 replace=replace, 2023 refresh=refresh, 2024 unique=unique, 2025 expression=expression, 2026 exists=exists, 2027 properties=properties, 2028 indexes=indexes, 2029 no_schema_binding=no_schema_binding, 2030 begin=begin, 2031 end=end, 2032 clone=clone, 2033 concurrently=concurrently, 2034 clustered=clustered, 2035 ) 2036 2037 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2038 seq = exp.SequenceProperties() 2039 2040 options = [] 2041 index = self._index 2042 2043 while self._curr: 2044 self._match(TokenType.COMMA) 2045 if self._match_text_seq("INCREMENT"): 2046 self._match_text_seq("BY") 2047 self._match_text_seq("=") 2048 seq.set("increment", self._parse_term()) 2049 elif self._match_text_seq("MINVALUE"): 2050 seq.set("minvalue", self._parse_term()) 2051 elif self._match_text_seq("MAXVALUE"): 2052 seq.set("maxvalue", self._parse_term()) 2053 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2054 self._match_text_seq("=") 2055 seq.set("start", self._parse_term()) 2056 elif self._match_text_seq("CACHE"): 2057 # T-SQL allows empty CACHE which is initialized dynamically 2058 seq.set("cache", self._parse_number() or True) 2059 elif self._match_text_seq("OWNED", "BY"): 2060 # "OWNED BY NONE" is the default 2061 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2062 else: 2063 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2064 if opt: 2065 options.append(opt) 2066 else: 2067 break 2068 2069 seq.set("options", options if options else None) 2070 return None if self._index == index else seq 2071 2072 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2073 # only used for teradata currently 2074 self._match(TokenType.COMMA) 2075 2076 kwargs = { 2077 "no": self._match_text_seq("NO"), 2078 "dual": self._match_text_seq("DUAL"), 2079 "before": self._match_text_seq("BEFORE"), 2080 "default": self._match_text_seq("DEFAULT"), 2081 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2082 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2083 "after": self._match_text_seq("AFTER"), 2084 "minimum": self._match_texts(("MIN", "MINIMUM")), 2085 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2086 } 2087 2088 if self._match_texts(self.PROPERTY_PARSERS): 2089 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2090 try: 2091 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2092 except TypeError: 2093 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2094 2095 return None 2096 2097 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2098 return self._parse_wrapped_csv(self._parse_property) 2099 2100 def _parse_property(self) -> t.Optional[exp.Expression]: 2101 if self._match_texts(self.PROPERTY_PARSERS): 2102 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2103 2104 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2105 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2106 2107 if self._match_text_seq("COMPOUND", "SORTKEY"): 2108 return self._parse_sortkey(compound=True) 2109 2110 if self._match_text_seq("SQL", "SECURITY"): 2111 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2112 2113 index = self._index 2114 key = self._parse_column() 2115 2116 if not self._match(TokenType.EQ): 2117 self._retreat(index) 2118 return self._parse_sequence_properties() 2119 2120 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2121 if isinstance(key, exp.Column): 2122 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2123 2124 value = self._parse_bitwise() or self._parse_var(any_token=True) 2125 2126 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2127 if isinstance(value, exp.Column): 2128 value = exp.var(value.name) 2129 2130 return self.expression(exp.Property, this=key, value=value) 2131 2132 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2133 if self._match_text_seq("BY"): 2134 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2135 2136 self._match(TokenType.ALIAS) 2137 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2138 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2139 2140 return self.expression( 2141 exp.FileFormatProperty, 2142 this=( 2143 self.expression( 2144 exp.InputOutputFormat, 2145 input_format=input_format, 2146 output_format=output_format, 2147 ) 2148 if input_format or output_format 2149 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2150 ), 2151 ) 2152 2153 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2154 field = self._parse_field() 2155 if isinstance(field, exp.Identifier) and not field.quoted: 2156 field = exp.var(field) 2157 2158 return field 2159 2160 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2161 self._match(TokenType.EQ) 2162 self._match(TokenType.ALIAS) 2163 2164 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2165 2166 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2167 properties = [] 2168 while True: 2169 if before: 2170 prop = self._parse_property_before() 2171 else: 2172 prop = self._parse_property() 2173 if not prop: 2174 break 2175 for p in ensure_list(prop): 2176 properties.append(p) 2177 2178 if properties: 2179 return self.expression(exp.Properties, expressions=properties) 2180 2181 return None 2182 2183 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2184 return self.expression( 2185 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2186 ) 2187 2188 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2189 if self._match_texts(("DEFINER", "INVOKER")): 2190 security_specifier = self._prev.text.upper() 2191 return self.expression(exp.SecurityProperty, this=security_specifier) 2192 return None 2193 2194 def _parse_settings_property(self) -> exp.SettingsProperty: 2195 return self.expression( 2196 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2197 ) 2198 2199 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2200 if self._index >= 2: 2201 pre_volatile_token = self._tokens[self._index - 2] 2202 else: 2203 pre_volatile_token = None 2204 2205 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2206 return exp.VolatileProperty() 2207 2208 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2209 2210 def _parse_retention_period(self) -> exp.Var: 2211 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2212 number = self._parse_number() 2213 number_str = f"{number} " if number else "" 2214 unit = self._parse_var(any_token=True) 2215 return exp.var(f"{number_str}{unit}") 2216 2217 def _parse_system_versioning_property( 2218 self, with_: bool = False 2219 ) -> exp.WithSystemVersioningProperty: 2220 self._match(TokenType.EQ) 2221 prop = self.expression( 2222 exp.WithSystemVersioningProperty, 2223 **{ # type: ignore 2224 "on": True, 2225 "with": with_, 2226 }, 2227 ) 2228 2229 if self._match_text_seq("OFF"): 2230 prop.set("on", False) 2231 return prop 2232 2233 self._match(TokenType.ON) 2234 if self._match(TokenType.L_PAREN): 2235 while self._curr and not self._match(TokenType.R_PAREN): 2236 if self._match_text_seq("HISTORY_TABLE", "="): 2237 prop.set("this", self._parse_table_parts()) 2238 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2239 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2240 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2241 prop.set("retention_period", self._parse_retention_period()) 2242 2243 self._match(TokenType.COMMA) 2244 2245 return prop 2246 2247 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2248 self._match(TokenType.EQ) 2249 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2250 prop = self.expression(exp.DataDeletionProperty, on=on) 2251 2252 if self._match(TokenType.L_PAREN): 2253 while self._curr and not self._match(TokenType.R_PAREN): 2254 if self._match_text_seq("FILTER_COLUMN", "="): 2255 prop.set("filter_column", self._parse_column()) 2256 elif self._match_text_seq("RETENTION_PERIOD", "="): 2257 prop.set("retention_period", self._parse_retention_period()) 2258 2259 self._match(TokenType.COMMA) 2260 2261 return prop 2262 2263 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2264 kind = "HASH" 2265 expressions: t.Optional[t.List[exp.Expression]] = None 2266 if self._match_text_seq("BY", "HASH"): 2267 expressions = self._parse_wrapped_csv(self._parse_id_var) 2268 elif self._match_text_seq("BY", "RANDOM"): 2269 kind = "RANDOM" 2270 2271 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2272 buckets: t.Optional[exp.Expression] = None 2273 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2274 buckets = self._parse_number() 2275 2276 return self.expression( 2277 exp.DistributedByProperty, 2278 expressions=expressions, 2279 kind=kind, 2280 buckets=buckets, 2281 order=self._parse_order(), 2282 ) 2283 2284 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2285 self._match_text_seq("KEY") 2286 expressions = self._parse_wrapped_id_vars() 2287 return self.expression(expr_type, expressions=expressions) 2288 2289 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2290 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2291 prop = self._parse_system_versioning_property(with_=True) 2292 self._match_r_paren() 2293 return prop 2294 2295 if self._match(TokenType.L_PAREN, advance=False): 2296 return self._parse_wrapped_properties() 2297 2298 if self._match_text_seq("JOURNAL"): 2299 return self._parse_withjournaltable() 2300 2301 if self._match_texts(self.VIEW_ATTRIBUTES): 2302 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2303 2304 if self._match_text_seq("DATA"): 2305 return self._parse_withdata(no=False) 2306 elif self._match_text_seq("NO", "DATA"): 2307 return self._parse_withdata(no=True) 2308 2309 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2310 return self._parse_serde_properties(with_=True) 2311 2312 if self._match(TokenType.SCHEMA): 2313 return self.expression( 2314 exp.WithSchemaBindingProperty, 2315 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2316 ) 2317 2318 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2319 return self.expression( 2320 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2321 ) 2322 2323 if not self._next: 2324 return None 2325 2326 return self._parse_withisolatedloading() 2327 2328 def _parse_procedure_option(self) -> exp.Expression | None: 2329 if self._match_text_seq("EXECUTE", "AS"): 2330 return self.expression( 2331 exp.ExecuteAsProperty, 2332 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2333 or self._parse_string(), 2334 ) 2335 2336 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2337 2338 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2339 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2340 self._match(TokenType.EQ) 2341 2342 user = self._parse_id_var() 2343 self._match(TokenType.PARAMETER) 2344 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2345 2346 if not user or not host: 2347 return None 2348 2349 return exp.DefinerProperty(this=f"{user}@{host}") 2350 2351 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2352 self._match(TokenType.TABLE) 2353 self._match(TokenType.EQ) 2354 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2355 2356 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2357 return self.expression(exp.LogProperty, no=no) 2358 2359 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2360 return self.expression(exp.JournalProperty, **kwargs) 2361 2362 def _parse_checksum(self) -> exp.ChecksumProperty: 2363 self._match(TokenType.EQ) 2364 2365 on = None 2366 if self._match(TokenType.ON): 2367 on = True 2368 elif self._match_text_seq("OFF"): 2369 on = False 2370 2371 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2372 2373 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2374 return self.expression( 2375 exp.Cluster, 2376 expressions=( 2377 self._parse_wrapped_csv(self._parse_ordered) 2378 if wrapped 2379 else self._parse_csv(self._parse_ordered) 2380 ), 2381 ) 2382 2383 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2384 self._match_text_seq("BY") 2385 2386 self._match_l_paren() 2387 expressions = self._parse_csv(self._parse_column) 2388 self._match_r_paren() 2389 2390 if self._match_text_seq("SORTED", "BY"): 2391 self._match_l_paren() 2392 sorted_by = self._parse_csv(self._parse_ordered) 2393 self._match_r_paren() 2394 else: 2395 sorted_by = None 2396 2397 self._match(TokenType.INTO) 2398 buckets = self._parse_number() 2399 self._match_text_seq("BUCKETS") 2400 2401 return self.expression( 2402 exp.ClusteredByProperty, 2403 expressions=expressions, 2404 sorted_by=sorted_by, 2405 buckets=buckets, 2406 ) 2407 2408 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2409 if not self._match_text_seq("GRANTS"): 2410 self._retreat(self._index - 1) 2411 return None 2412 2413 return self.expression(exp.CopyGrantsProperty) 2414 2415 def _parse_freespace(self) -> exp.FreespaceProperty: 2416 self._match(TokenType.EQ) 2417 return self.expression( 2418 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2419 ) 2420 2421 def _parse_mergeblockratio( 2422 self, no: bool = False, default: bool = False 2423 ) -> exp.MergeBlockRatioProperty: 2424 if self._match(TokenType.EQ): 2425 return self.expression( 2426 exp.MergeBlockRatioProperty, 2427 this=self._parse_number(), 2428 percent=self._match(TokenType.PERCENT), 2429 ) 2430 2431 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2432 2433 def _parse_datablocksize( 2434 self, 2435 default: t.Optional[bool] = None, 2436 minimum: t.Optional[bool] = None, 2437 maximum: t.Optional[bool] = None, 2438 ) -> exp.DataBlocksizeProperty: 2439 self._match(TokenType.EQ) 2440 size = self._parse_number() 2441 2442 units = None 2443 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2444 units = self._prev.text 2445 2446 return self.expression( 2447 exp.DataBlocksizeProperty, 2448 size=size, 2449 units=units, 2450 default=default, 2451 minimum=minimum, 2452 maximum=maximum, 2453 ) 2454 2455 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2456 self._match(TokenType.EQ) 2457 always = self._match_text_seq("ALWAYS") 2458 manual = self._match_text_seq("MANUAL") 2459 never = self._match_text_seq("NEVER") 2460 default = self._match_text_seq("DEFAULT") 2461 2462 autotemp = None 2463 if self._match_text_seq("AUTOTEMP"): 2464 autotemp = self._parse_schema() 2465 2466 return self.expression( 2467 exp.BlockCompressionProperty, 2468 always=always, 2469 manual=manual, 2470 never=never, 2471 default=default, 2472 autotemp=autotemp, 2473 ) 2474 2475 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2476 index = self._index 2477 no = self._match_text_seq("NO") 2478 concurrent = self._match_text_seq("CONCURRENT") 2479 2480 if not self._match_text_seq("ISOLATED", "LOADING"): 2481 self._retreat(index) 2482 return None 2483 2484 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2485 return self.expression( 2486 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2487 ) 2488 2489 def _parse_locking(self) -> exp.LockingProperty: 2490 if self._match(TokenType.TABLE): 2491 kind = "TABLE" 2492 elif self._match(TokenType.VIEW): 2493 kind = "VIEW" 2494 elif self._match(TokenType.ROW): 2495 kind = "ROW" 2496 elif self._match_text_seq("DATABASE"): 2497 kind = "DATABASE" 2498 else: 2499 kind = None 2500 2501 if kind in ("DATABASE", "TABLE", "VIEW"): 2502 this = self._parse_table_parts() 2503 else: 2504 this = None 2505 2506 if self._match(TokenType.FOR): 2507 for_or_in = "FOR" 2508 elif self._match(TokenType.IN): 2509 for_or_in = "IN" 2510 else: 2511 for_or_in = None 2512 2513 if self._match_text_seq("ACCESS"): 2514 lock_type = "ACCESS" 2515 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2516 lock_type = "EXCLUSIVE" 2517 elif self._match_text_seq("SHARE"): 2518 lock_type = "SHARE" 2519 elif self._match_text_seq("READ"): 2520 lock_type = "READ" 2521 elif self._match_text_seq("WRITE"): 2522 lock_type = "WRITE" 2523 elif self._match_text_seq("CHECKSUM"): 2524 lock_type = "CHECKSUM" 2525 else: 2526 lock_type = None 2527 2528 override = self._match_text_seq("OVERRIDE") 2529 2530 return self.expression( 2531 exp.LockingProperty, 2532 this=this, 2533 kind=kind, 2534 for_or_in=for_or_in, 2535 lock_type=lock_type, 2536 override=override, 2537 ) 2538 2539 def _parse_partition_by(self) -> t.List[exp.Expression]: 2540 if self._match(TokenType.PARTITION_BY): 2541 return self._parse_csv(self._parse_assignment) 2542 return [] 2543 2544 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2545 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2546 if self._match_text_seq("MINVALUE"): 2547 return exp.var("MINVALUE") 2548 if self._match_text_seq("MAXVALUE"): 2549 return exp.var("MAXVALUE") 2550 return self._parse_bitwise() 2551 2552 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2553 expression = None 2554 from_expressions = None 2555 to_expressions = None 2556 2557 if self._match(TokenType.IN): 2558 this = self._parse_wrapped_csv(self._parse_bitwise) 2559 elif self._match(TokenType.FROM): 2560 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2561 self._match_text_seq("TO") 2562 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2563 elif self._match_text_seq("WITH", "(", "MODULUS"): 2564 this = self._parse_number() 2565 self._match_text_seq(",", "REMAINDER") 2566 expression = self._parse_number() 2567 self._match_r_paren() 2568 else: 2569 self.raise_error("Failed to parse partition bound spec.") 2570 2571 return self.expression( 2572 exp.PartitionBoundSpec, 2573 this=this, 2574 expression=expression, 2575 from_expressions=from_expressions, 2576 to_expressions=to_expressions, 2577 ) 2578 2579 # https://www.postgresql.org/docs/current/sql-createtable.html 2580 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2581 if not self._match_text_seq("OF"): 2582 self._retreat(self._index - 1) 2583 return None 2584 2585 this = self._parse_table(schema=True) 2586 2587 if self._match(TokenType.DEFAULT): 2588 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2589 elif self._match_text_seq("FOR", "VALUES"): 2590 expression = self._parse_partition_bound_spec() 2591 else: 2592 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2593 2594 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2595 2596 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2597 self._match(TokenType.EQ) 2598 return self.expression( 2599 exp.PartitionedByProperty, 2600 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2601 ) 2602 2603 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2604 if self._match_text_seq("AND", "STATISTICS"): 2605 statistics = True 2606 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2607 statistics = False 2608 else: 2609 statistics = None 2610 2611 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2612 2613 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2614 if self._match_text_seq("SQL"): 2615 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2616 return None 2617 2618 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2619 if self._match_text_seq("SQL", "DATA"): 2620 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2621 return None 2622 2623 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2624 if self._match_text_seq("PRIMARY", "INDEX"): 2625 return exp.NoPrimaryIndexProperty() 2626 if self._match_text_seq("SQL"): 2627 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2628 return None 2629 2630 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2631 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2632 return exp.OnCommitProperty() 2633 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2634 return exp.OnCommitProperty(delete=True) 2635 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2636 2637 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2638 if self._match_text_seq("SQL", "DATA"): 2639 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2640 return None 2641 2642 def _parse_distkey(self) -> exp.DistKeyProperty: 2643 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2644 2645 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2646 table = self._parse_table(schema=True) 2647 2648 options = [] 2649 while self._match_texts(("INCLUDING", "EXCLUDING")): 2650 this = self._prev.text.upper() 2651 2652 id_var = self._parse_id_var() 2653 if not id_var: 2654 return None 2655 2656 options.append( 2657 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2658 ) 2659 2660 return self.expression(exp.LikeProperty, this=table, expressions=options) 2661 2662 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2663 return self.expression( 2664 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2665 ) 2666 2667 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2668 self._match(TokenType.EQ) 2669 return self.expression( 2670 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2671 ) 2672 2673 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2674 self._match_text_seq("WITH", "CONNECTION") 2675 return self.expression( 2676 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2677 ) 2678 2679 def _parse_returns(self) -> exp.ReturnsProperty: 2680 value: t.Optional[exp.Expression] 2681 null = None 2682 is_table = self._match(TokenType.TABLE) 2683 2684 if is_table: 2685 if self._match(TokenType.LT): 2686 value = self.expression( 2687 exp.Schema, 2688 this="TABLE", 2689 expressions=self._parse_csv(self._parse_struct_types), 2690 ) 2691 if not self._match(TokenType.GT): 2692 self.raise_error("Expecting >") 2693 else: 2694 value = self._parse_schema(exp.var("TABLE")) 2695 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2696 null = True 2697 value = None 2698 else: 2699 value = self._parse_types() 2700 2701 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2702 2703 def _parse_describe(self) -> exp.Describe: 2704 kind = self._match_set(self.CREATABLES) and self._prev.text 2705 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2706 if self._match(TokenType.DOT): 2707 style = None 2708 self._retreat(self._index - 2) 2709 2710 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2711 2712 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2713 this = self._parse_statement() 2714 else: 2715 this = self._parse_table(schema=True) 2716 2717 properties = self._parse_properties() 2718 expressions = properties.expressions if properties else None 2719 partition = self._parse_partition() 2720 return self.expression( 2721 exp.Describe, 2722 this=this, 2723 style=style, 2724 kind=kind, 2725 expressions=expressions, 2726 partition=partition, 2727 format=format, 2728 ) 2729 2730 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2731 kind = self._prev.text.upper() 2732 expressions = [] 2733 2734 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2735 if self._match(TokenType.WHEN): 2736 expression = self._parse_disjunction() 2737 self._match(TokenType.THEN) 2738 else: 2739 expression = None 2740 2741 else_ = self._match(TokenType.ELSE) 2742 2743 if not self._match(TokenType.INTO): 2744 return None 2745 2746 return self.expression( 2747 exp.ConditionalInsert, 2748 this=self.expression( 2749 exp.Insert, 2750 this=self._parse_table(schema=True), 2751 expression=self._parse_derived_table_values(), 2752 ), 2753 expression=expression, 2754 else_=else_, 2755 ) 2756 2757 expression = parse_conditional_insert() 2758 while expression is not None: 2759 expressions.append(expression) 2760 expression = parse_conditional_insert() 2761 2762 return self.expression( 2763 exp.MultitableInserts, 2764 kind=kind, 2765 comments=comments, 2766 expressions=expressions, 2767 source=self._parse_table(), 2768 ) 2769 2770 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2771 comments = [] 2772 hint = self._parse_hint() 2773 overwrite = self._match(TokenType.OVERWRITE) 2774 ignore = self._match(TokenType.IGNORE) 2775 local = self._match_text_seq("LOCAL") 2776 alternative = None 2777 is_function = None 2778 2779 if self._match_text_seq("DIRECTORY"): 2780 this: t.Optional[exp.Expression] = self.expression( 2781 exp.Directory, 2782 this=self._parse_var_or_string(), 2783 local=local, 2784 row_format=self._parse_row_format(match_row=True), 2785 ) 2786 else: 2787 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2788 comments += ensure_list(self._prev_comments) 2789 return self._parse_multitable_inserts(comments) 2790 2791 if self._match(TokenType.OR): 2792 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2793 2794 self._match(TokenType.INTO) 2795 comments += ensure_list(self._prev_comments) 2796 self._match(TokenType.TABLE) 2797 is_function = self._match(TokenType.FUNCTION) 2798 2799 this = ( 2800 self._parse_table(schema=True, parse_partition=True) 2801 if not is_function 2802 else self._parse_function() 2803 ) 2804 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2805 this.set("alias", self._parse_table_alias()) 2806 2807 returning = self._parse_returning() 2808 2809 return self.expression( 2810 exp.Insert, 2811 comments=comments, 2812 hint=hint, 2813 is_function=is_function, 2814 this=this, 2815 stored=self._match_text_seq("STORED") and self._parse_stored(), 2816 by_name=self._match_text_seq("BY", "NAME"), 2817 exists=self._parse_exists(), 2818 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2819 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2820 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2821 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2822 conflict=self._parse_on_conflict(), 2823 returning=returning or self._parse_returning(), 2824 overwrite=overwrite, 2825 alternative=alternative, 2826 ignore=ignore, 2827 source=self._match(TokenType.TABLE) and self._parse_table(), 2828 ) 2829 2830 def _parse_kill(self) -> exp.Kill: 2831 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2832 2833 return self.expression( 2834 exp.Kill, 2835 this=self._parse_primary(), 2836 kind=kind, 2837 ) 2838 2839 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2840 conflict = self._match_text_seq("ON", "CONFLICT") 2841 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2842 2843 if not conflict and not duplicate: 2844 return None 2845 2846 conflict_keys = None 2847 constraint = None 2848 2849 if conflict: 2850 if self._match_text_seq("ON", "CONSTRAINT"): 2851 constraint = self._parse_id_var() 2852 elif self._match(TokenType.L_PAREN): 2853 conflict_keys = self._parse_csv(self._parse_id_var) 2854 self._match_r_paren() 2855 2856 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2857 if self._prev.token_type == TokenType.UPDATE: 2858 self._match(TokenType.SET) 2859 expressions = self._parse_csv(self._parse_equality) 2860 else: 2861 expressions = None 2862 2863 return self.expression( 2864 exp.OnConflict, 2865 duplicate=duplicate, 2866 expressions=expressions, 2867 action=action, 2868 conflict_keys=conflict_keys, 2869 constraint=constraint, 2870 where=self._parse_where(), 2871 ) 2872 2873 def _parse_returning(self) -> t.Optional[exp.Returning]: 2874 if not self._match(TokenType.RETURNING): 2875 return None 2876 return self.expression( 2877 exp.Returning, 2878 expressions=self._parse_csv(self._parse_expression), 2879 into=self._match(TokenType.INTO) and self._parse_table_part(), 2880 ) 2881 2882 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2883 if not self._match(TokenType.FORMAT): 2884 return None 2885 return self._parse_row_format() 2886 2887 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2888 index = self._index 2889 with_ = with_ or self._match_text_seq("WITH") 2890 2891 if not self._match(TokenType.SERDE_PROPERTIES): 2892 self._retreat(index) 2893 return None 2894 return self.expression( 2895 exp.SerdeProperties, 2896 **{ # type: ignore 2897 "expressions": self._parse_wrapped_properties(), 2898 "with": with_, 2899 }, 2900 ) 2901 2902 def _parse_row_format( 2903 self, match_row: bool = False 2904 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2905 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2906 return None 2907 2908 if self._match_text_seq("SERDE"): 2909 this = self._parse_string() 2910 2911 serde_properties = self._parse_serde_properties() 2912 2913 return self.expression( 2914 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2915 ) 2916 2917 self._match_text_seq("DELIMITED") 2918 2919 kwargs = {} 2920 2921 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2922 kwargs["fields"] = self._parse_string() 2923 if self._match_text_seq("ESCAPED", "BY"): 2924 kwargs["escaped"] = self._parse_string() 2925 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2926 kwargs["collection_items"] = self._parse_string() 2927 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2928 kwargs["map_keys"] = self._parse_string() 2929 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2930 kwargs["lines"] = self._parse_string() 2931 if self._match_text_seq("NULL", "DEFINED", "AS"): 2932 kwargs["null"] = self._parse_string() 2933 2934 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2935 2936 def _parse_load(self) -> exp.LoadData | exp.Command: 2937 if self._match_text_seq("DATA"): 2938 local = self._match_text_seq("LOCAL") 2939 self._match_text_seq("INPATH") 2940 inpath = self._parse_string() 2941 overwrite = self._match(TokenType.OVERWRITE) 2942 self._match_pair(TokenType.INTO, TokenType.TABLE) 2943 2944 return self.expression( 2945 exp.LoadData, 2946 this=self._parse_table(schema=True), 2947 local=local, 2948 overwrite=overwrite, 2949 inpath=inpath, 2950 partition=self._parse_partition(), 2951 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2952 serde=self._match_text_seq("SERDE") and self._parse_string(), 2953 ) 2954 return self._parse_as_command(self._prev) 2955 2956 def _parse_delete(self) -> exp.Delete: 2957 # This handles MySQL's "Multiple-Table Syntax" 2958 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2959 tables = None 2960 if not self._match(TokenType.FROM, advance=False): 2961 tables = self._parse_csv(self._parse_table) or None 2962 2963 returning = self._parse_returning() 2964 2965 return self.expression( 2966 exp.Delete, 2967 tables=tables, 2968 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2969 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2970 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2971 where=self._parse_where(), 2972 returning=returning or self._parse_returning(), 2973 limit=self._parse_limit(), 2974 ) 2975 2976 def _parse_update(self) -> exp.Update: 2977 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2978 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2979 returning = self._parse_returning() 2980 return self.expression( 2981 exp.Update, 2982 **{ # type: ignore 2983 "this": this, 2984 "expressions": expressions, 2985 "from": self._parse_from(joins=True), 2986 "where": self._parse_where(), 2987 "returning": returning or self._parse_returning(), 2988 "order": self._parse_order(), 2989 "limit": self._parse_limit(), 2990 }, 2991 ) 2992 2993 def _parse_use(self) -> exp.Use: 2994 return self.expression( 2995 exp.Use, 2996 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 2997 this=self._parse_table(schema=False), 2998 ) 2999 3000 def _parse_uncache(self) -> exp.Uncache: 3001 if not self._match(TokenType.TABLE): 3002 self.raise_error("Expecting TABLE after UNCACHE") 3003 3004 return self.expression( 3005 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3006 ) 3007 3008 def _parse_cache(self) -> exp.Cache: 3009 lazy = self._match_text_seq("LAZY") 3010 self._match(TokenType.TABLE) 3011 table = self._parse_table(schema=True) 3012 3013 options = [] 3014 if self._match_text_seq("OPTIONS"): 3015 self._match_l_paren() 3016 k = self._parse_string() 3017 self._match(TokenType.EQ) 3018 v = self._parse_string() 3019 options = [k, v] 3020 self._match_r_paren() 3021 3022 self._match(TokenType.ALIAS) 3023 return self.expression( 3024 exp.Cache, 3025 this=table, 3026 lazy=lazy, 3027 options=options, 3028 expression=self._parse_select(nested=True), 3029 ) 3030 3031 def _parse_partition(self) -> t.Optional[exp.Partition]: 3032 if not self._match_texts(self.PARTITION_KEYWORDS): 3033 return None 3034 3035 return self.expression( 3036 exp.Partition, 3037 subpartition=self._prev.text.upper() == "SUBPARTITION", 3038 expressions=self._parse_wrapped_csv(self._parse_assignment), 3039 ) 3040 3041 def _parse_value(self) -> t.Optional[exp.Tuple]: 3042 def _parse_value_expression() -> t.Optional[exp.Expression]: 3043 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3044 return exp.var(self._prev.text.upper()) 3045 return self._parse_expression() 3046 3047 if self._match(TokenType.L_PAREN): 3048 expressions = self._parse_csv(_parse_value_expression) 3049 self._match_r_paren() 3050 return self.expression(exp.Tuple, expressions=expressions) 3051 3052 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3053 expression = self._parse_expression() 3054 if expression: 3055 return self.expression(exp.Tuple, expressions=[expression]) 3056 return None 3057 3058 def _parse_projections(self) -> t.List[exp.Expression]: 3059 return self._parse_expressions() 3060 3061 def _parse_select( 3062 self, 3063 nested: bool = False, 3064 table: bool = False, 3065 parse_subquery_alias: bool = True, 3066 parse_set_operation: bool = True, 3067 ) -> t.Optional[exp.Expression]: 3068 cte = self._parse_with() 3069 3070 if cte: 3071 this = self._parse_statement() 3072 3073 if not this: 3074 self.raise_error("Failed to parse any statement following CTE") 3075 return cte 3076 3077 if "with" in this.arg_types: 3078 this.set("with", cte) 3079 else: 3080 self.raise_error(f"{this.key} does not support CTE") 3081 this = cte 3082 3083 return this 3084 3085 # duckdb supports leading with FROM x 3086 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3087 3088 if self._match(TokenType.SELECT): 3089 comments = self._prev_comments 3090 3091 hint = self._parse_hint() 3092 3093 if self._next and not self._next.token_type == TokenType.DOT: 3094 all_ = self._match(TokenType.ALL) 3095 distinct = self._match_set(self.DISTINCT_TOKENS) 3096 else: 3097 all_, distinct = None, None 3098 3099 kind = ( 3100 self._match(TokenType.ALIAS) 3101 and self._match_texts(("STRUCT", "VALUE")) 3102 and self._prev.text.upper() 3103 ) 3104 3105 if distinct: 3106 distinct = self.expression( 3107 exp.Distinct, 3108 on=self._parse_value() if self._match(TokenType.ON) else None, 3109 ) 3110 3111 if all_ and distinct: 3112 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3113 3114 operation_modifiers = [] 3115 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3116 operation_modifiers.append(exp.var(self._prev.text.upper())) 3117 3118 limit = self._parse_limit(top=True) 3119 projections = self._parse_projections() 3120 3121 this = self.expression( 3122 exp.Select, 3123 kind=kind, 3124 hint=hint, 3125 distinct=distinct, 3126 expressions=projections, 3127 limit=limit, 3128 operation_modifiers=operation_modifiers or None, 3129 ) 3130 this.comments = comments 3131 3132 into = self._parse_into() 3133 if into: 3134 this.set("into", into) 3135 3136 if not from_: 3137 from_ = self._parse_from() 3138 3139 if from_: 3140 this.set("from", from_) 3141 3142 this = self._parse_query_modifiers(this) 3143 elif (table or nested) and self._match(TokenType.L_PAREN): 3144 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3145 this = self._parse_simplified_pivot( 3146 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3147 ) 3148 elif self._match(TokenType.FROM): 3149 from_ = self._parse_from(skip_from_token=True) 3150 # Support parentheses for duckdb FROM-first syntax 3151 select = self._parse_select() 3152 if select: 3153 select.set("from", from_) 3154 this = select 3155 else: 3156 this = exp.select("*").from_(t.cast(exp.From, from_)) 3157 else: 3158 this = ( 3159 self._parse_table() 3160 if table 3161 else self._parse_select(nested=True, parse_set_operation=False) 3162 ) 3163 3164 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3165 # in case a modifier (e.g. join) is following 3166 if table and isinstance(this, exp.Values) and this.alias: 3167 alias = this.args["alias"].pop() 3168 this = exp.Table(this=this, alias=alias) 3169 3170 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3171 3172 self._match_r_paren() 3173 3174 # We return early here so that the UNION isn't attached to the subquery by the 3175 # following call to _parse_set_operations, but instead becomes the parent node 3176 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3177 elif self._match(TokenType.VALUES, advance=False): 3178 this = self._parse_derived_table_values() 3179 elif from_: 3180 this = exp.select("*").from_(from_.this, copy=False) 3181 elif self._match(TokenType.SUMMARIZE): 3182 table = self._match(TokenType.TABLE) 3183 this = self._parse_select() or self._parse_string() or self._parse_table() 3184 return self.expression(exp.Summarize, this=this, table=table) 3185 elif self._match(TokenType.DESCRIBE): 3186 this = self._parse_describe() 3187 elif self._match_text_seq("STREAM"): 3188 this = self._parse_function() 3189 if this: 3190 this = self.expression(exp.Stream, this=this) 3191 else: 3192 self._retreat(self._index - 1) 3193 else: 3194 this = None 3195 3196 return self._parse_set_operations(this) if parse_set_operation else this 3197 3198 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3199 self._match_text_seq("SEARCH") 3200 3201 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3202 3203 if not kind: 3204 return None 3205 3206 self._match_text_seq("FIRST", "BY") 3207 3208 return self.expression( 3209 exp.RecursiveWithSearch, 3210 kind=kind, 3211 this=self._parse_id_var(), 3212 expression=self._match_text_seq("SET") and self._parse_id_var(), 3213 using=self._match_text_seq("USING") and self._parse_id_var(), 3214 ) 3215 3216 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3217 if not skip_with_token and not self._match(TokenType.WITH): 3218 return None 3219 3220 comments = self._prev_comments 3221 recursive = self._match(TokenType.RECURSIVE) 3222 3223 last_comments = None 3224 expressions = [] 3225 while True: 3226 cte = self._parse_cte() 3227 if isinstance(cte, exp.CTE): 3228 expressions.append(cte) 3229 if last_comments: 3230 cte.add_comments(last_comments) 3231 3232 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3233 break 3234 else: 3235 self._match(TokenType.WITH) 3236 3237 last_comments = self._prev_comments 3238 3239 return self.expression( 3240 exp.With, 3241 comments=comments, 3242 expressions=expressions, 3243 recursive=recursive, 3244 search=self._parse_recursive_with_search(), 3245 ) 3246 3247 def _parse_cte(self) -> t.Optional[exp.CTE]: 3248 index = self._index 3249 3250 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3251 if not alias or not alias.this: 3252 self.raise_error("Expected CTE to have alias") 3253 3254 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3255 self._retreat(index) 3256 return None 3257 3258 comments = self._prev_comments 3259 3260 if self._match_text_seq("NOT", "MATERIALIZED"): 3261 materialized = False 3262 elif self._match_text_seq("MATERIALIZED"): 3263 materialized = True 3264 else: 3265 materialized = None 3266 3267 cte = self.expression( 3268 exp.CTE, 3269 this=self._parse_wrapped(self._parse_statement), 3270 alias=alias, 3271 materialized=materialized, 3272 comments=comments, 3273 ) 3274 3275 if isinstance(cte.this, exp.Values): 3276 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3277 3278 return cte 3279 3280 def _parse_table_alias( 3281 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3282 ) -> t.Optional[exp.TableAlias]: 3283 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3284 # so this section tries to parse the clause version and if it fails, it treats the token 3285 # as an identifier (alias) 3286 if self._can_parse_limit_or_offset(): 3287 return None 3288 3289 any_token = self._match(TokenType.ALIAS) 3290 alias = ( 3291 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3292 or self._parse_string_as_identifier() 3293 ) 3294 3295 index = self._index 3296 if self._match(TokenType.L_PAREN): 3297 columns = self._parse_csv(self._parse_function_parameter) 3298 self._match_r_paren() if columns else self._retreat(index) 3299 else: 3300 columns = None 3301 3302 if not alias and not columns: 3303 return None 3304 3305 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3306 3307 # We bubble up comments from the Identifier to the TableAlias 3308 if isinstance(alias, exp.Identifier): 3309 table_alias.add_comments(alias.pop_comments()) 3310 3311 return table_alias 3312 3313 def _parse_subquery( 3314 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3315 ) -> t.Optional[exp.Subquery]: 3316 if not this: 3317 return None 3318 3319 return self.expression( 3320 exp.Subquery, 3321 this=this, 3322 pivots=self._parse_pivots(), 3323 alias=self._parse_table_alias() if parse_alias else None, 3324 sample=self._parse_table_sample(), 3325 ) 3326 3327 def _implicit_unnests_to_explicit(self, this: E) -> E: 3328 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3329 3330 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3331 for i, join in enumerate(this.args.get("joins") or []): 3332 table = join.this 3333 normalized_table = table.copy() 3334 normalized_table.meta["maybe_column"] = True 3335 normalized_table = _norm(normalized_table, dialect=self.dialect) 3336 3337 if isinstance(table, exp.Table) and not join.args.get("on"): 3338 if normalized_table.parts[0].name in refs: 3339 table_as_column = table.to_column() 3340 unnest = exp.Unnest(expressions=[table_as_column]) 3341 3342 # Table.to_column creates a parent Alias node that we want to convert to 3343 # a TableAlias and attach to the Unnest, so it matches the parser's output 3344 if isinstance(table.args.get("alias"), exp.TableAlias): 3345 table_as_column.replace(table_as_column.this) 3346 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3347 3348 table.replace(unnest) 3349 3350 refs.add(normalized_table.alias_or_name) 3351 3352 return this 3353 3354 def _parse_query_modifiers( 3355 self, this: t.Optional[exp.Expression] 3356 ) -> t.Optional[exp.Expression]: 3357 if isinstance(this, self.MODIFIABLES): 3358 for join in self._parse_joins(): 3359 this.append("joins", join) 3360 for lateral in iter(self._parse_lateral, None): 3361 this.append("laterals", lateral) 3362 3363 while True: 3364 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3365 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3366 key, expression = parser(self) 3367 3368 if expression: 3369 this.set(key, expression) 3370 if key == "limit": 3371 offset = expression.args.pop("offset", None) 3372 3373 if offset: 3374 offset = exp.Offset(expression=offset) 3375 this.set("offset", offset) 3376 3377 limit_by_expressions = expression.expressions 3378 expression.set("expressions", None) 3379 offset.set("expressions", limit_by_expressions) 3380 continue 3381 break 3382 3383 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3384 this = self._implicit_unnests_to_explicit(this) 3385 3386 return this 3387 3388 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3389 start = self._curr 3390 while self._curr: 3391 self._advance() 3392 3393 end = self._tokens[self._index - 1] 3394 return exp.Hint(expressions=[self._find_sql(start, end)]) 3395 3396 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3397 return self._parse_function_call() 3398 3399 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3400 start_index = self._index 3401 should_fallback_to_string = False 3402 3403 hints = [] 3404 try: 3405 for hint in iter( 3406 lambda: self._parse_csv( 3407 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3408 ), 3409 [], 3410 ): 3411 hints.extend(hint) 3412 except ParseError: 3413 should_fallback_to_string = True 3414 3415 if should_fallback_to_string or self._curr: 3416 self._retreat(start_index) 3417 return self._parse_hint_fallback_to_string() 3418 3419 return self.expression(exp.Hint, expressions=hints) 3420 3421 def _parse_hint(self) -> t.Optional[exp.Hint]: 3422 if self._match(TokenType.HINT) and self._prev_comments: 3423 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3424 3425 return None 3426 3427 def _parse_into(self) -> t.Optional[exp.Into]: 3428 if not self._match(TokenType.INTO): 3429 return None 3430 3431 temp = self._match(TokenType.TEMPORARY) 3432 unlogged = self._match_text_seq("UNLOGGED") 3433 self._match(TokenType.TABLE) 3434 3435 return self.expression( 3436 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3437 ) 3438 3439 def _parse_from( 3440 self, joins: bool = False, skip_from_token: bool = False 3441 ) -> t.Optional[exp.From]: 3442 if not skip_from_token and not self._match(TokenType.FROM): 3443 return None 3444 3445 return self.expression( 3446 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3447 ) 3448 3449 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3450 return self.expression( 3451 exp.MatchRecognizeMeasure, 3452 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3453 this=self._parse_expression(), 3454 ) 3455 3456 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3457 if not self._match(TokenType.MATCH_RECOGNIZE): 3458 return None 3459 3460 self._match_l_paren() 3461 3462 partition = self._parse_partition_by() 3463 order = self._parse_order() 3464 3465 measures = ( 3466 self._parse_csv(self._parse_match_recognize_measure) 3467 if self._match_text_seq("MEASURES") 3468 else None 3469 ) 3470 3471 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3472 rows = exp.var("ONE ROW PER MATCH") 3473 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3474 text = "ALL ROWS PER MATCH" 3475 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3476 text += " SHOW EMPTY MATCHES" 3477 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3478 text += " OMIT EMPTY MATCHES" 3479 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3480 text += " WITH UNMATCHED ROWS" 3481 rows = exp.var(text) 3482 else: 3483 rows = None 3484 3485 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3486 text = "AFTER MATCH SKIP" 3487 if self._match_text_seq("PAST", "LAST", "ROW"): 3488 text += " PAST LAST ROW" 3489 elif self._match_text_seq("TO", "NEXT", "ROW"): 3490 text += " TO NEXT ROW" 3491 elif self._match_text_seq("TO", "FIRST"): 3492 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3493 elif self._match_text_seq("TO", "LAST"): 3494 text += f" TO LAST {self._advance_any().text}" # type: ignore 3495 after = exp.var(text) 3496 else: 3497 after = None 3498 3499 if self._match_text_seq("PATTERN"): 3500 self._match_l_paren() 3501 3502 if not self._curr: 3503 self.raise_error("Expecting )", self._curr) 3504 3505 paren = 1 3506 start = self._curr 3507 3508 while self._curr and paren > 0: 3509 if self._curr.token_type == TokenType.L_PAREN: 3510 paren += 1 3511 if self._curr.token_type == TokenType.R_PAREN: 3512 paren -= 1 3513 3514 end = self._prev 3515 self._advance() 3516 3517 if paren > 0: 3518 self.raise_error("Expecting )", self._curr) 3519 3520 pattern = exp.var(self._find_sql(start, end)) 3521 else: 3522 pattern = None 3523 3524 define = ( 3525 self._parse_csv(self._parse_name_as_expression) 3526 if self._match_text_seq("DEFINE") 3527 else None 3528 ) 3529 3530 self._match_r_paren() 3531 3532 return self.expression( 3533 exp.MatchRecognize, 3534 partition_by=partition, 3535 order=order, 3536 measures=measures, 3537 rows=rows, 3538 after=after, 3539 pattern=pattern, 3540 define=define, 3541 alias=self._parse_table_alias(), 3542 ) 3543 3544 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3545 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3546 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3547 cross_apply = False 3548 3549 if cross_apply is not None: 3550 this = self._parse_select(table=True) 3551 view = None 3552 outer = None 3553 elif self._match(TokenType.LATERAL): 3554 this = self._parse_select(table=True) 3555 view = self._match(TokenType.VIEW) 3556 outer = self._match(TokenType.OUTER) 3557 else: 3558 return None 3559 3560 if not this: 3561 this = ( 3562 self._parse_unnest() 3563 or self._parse_function() 3564 or self._parse_id_var(any_token=False) 3565 ) 3566 3567 while self._match(TokenType.DOT): 3568 this = exp.Dot( 3569 this=this, 3570 expression=self._parse_function() or self._parse_id_var(any_token=False), 3571 ) 3572 3573 if view: 3574 table = self._parse_id_var(any_token=False) 3575 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3576 table_alias: t.Optional[exp.TableAlias] = self.expression( 3577 exp.TableAlias, this=table, columns=columns 3578 ) 3579 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3580 # We move the alias from the lateral's child node to the lateral itself 3581 table_alias = this.args["alias"].pop() 3582 else: 3583 table_alias = self._parse_table_alias() 3584 3585 return self.expression( 3586 exp.Lateral, 3587 this=this, 3588 view=view, 3589 outer=outer, 3590 alias=table_alias, 3591 cross_apply=cross_apply, 3592 ) 3593 3594 def _parse_join_parts( 3595 self, 3596 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3597 return ( 3598 self._match_set(self.JOIN_METHODS) and self._prev, 3599 self._match_set(self.JOIN_SIDES) and self._prev, 3600 self._match_set(self.JOIN_KINDS) and self._prev, 3601 ) 3602 3603 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3604 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3605 this = self._parse_column() 3606 if isinstance(this, exp.Column): 3607 return this.this 3608 return this 3609 3610 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3611 3612 def _parse_join( 3613 self, skip_join_token: bool = False, parse_bracket: bool = False 3614 ) -> t.Optional[exp.Join]: 3615 if self._match(TokenType.COMMA): 3616 table = self._try_parse(self._parse_table) 3617 if table: 3618 return self.expression(exp.Join, this=table) 3619 return None 3620 3621 index = self._index 3622 method, side, kind = self._parse_join_parts() 3623 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3624 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3625 3626 if not skip_join_token and not join: 3627 self._retreat(index) 3628 kind = None 3629 method = None 3630 side = None 3631 3632 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3633 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3634 3635 if not skip_join_token and not join and not outer_apply and not cross_apply: 3636 return None 3637 3638 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3639 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3640 kwargs["expressions"] = self._parse_csv( 3641 lambda: self._parse_table(parse_bracket=parse_bracket) 3642 ) 3643 3644 if method: 3645 kwargs["method"] = method.text 3646 if side: 3647 kwargs["side"] = side.text 3648 if kind: 3649 kwargs["kind"] = kind.text 3650 if hint: 3651 kwargs["hint"] = hint 3652 3653 if self._match(TokenType.MATCH_CONDITION): 3654 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3655 3656 if self._match(TokenType.ON): 3657 kwargs["on"] = self._parse_assignment() 3658 elif self._match(TokenType.USING): 3659 kwargs["using"] = self._parse_using_identifiers() 3660 elif ( 3661 not (outer_apply or cross_apply) 3662 and not isinstance(kwargs["this"], exp.Unnest) 3663 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3664 ): 3665 index = self._index 3666 joins: t.Optional[list] = list(self._parse_joins()) 3667 3668 if joins and self._match(TokenType.ON): 3669 kwargs["on"] = self._parse_assignment() 3670 elif joins and self._match(TokenType.USING): 3671 kwargs["using"] = self._parse_using_identifiers() 3672 else: 3673 joins = None 3674 self._retreat(index) 3675 3676 kwargs["this"].set("joins", joins if joins else None) 3677 3678 comments = [c for token in (method, side, kind) if token for c in token.comments] 3679 return self.expression(exp.Join, comments=comments, **kwargs) 3680 3681 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3682 this = self._parse_assignment() 3683 3684 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3685 return this 3686 3687 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3688 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3689 3690 return this 3691 3692 def _parse_index_params(self) -> exp.IndexParameters: 3693 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3694 3695 if self._match(TokenType.L_PAREN, advance=False): 3696 columns = self._parse_wrapped_csv(self._parse_with_operator) 3697 else: 3698 columns = None 3699 3700 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3701 partition_by = self._parse_partition_by() 3702 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3703 tablespace = ( 3704 self._parse_var(any_token=True) 3705 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3706 else None 3707 ) 3708 where = self._parse_where() 3709 3710 on = self._parse_field() if self._match(TokenType.ON) else None 3711 3712 return self.expression( 3713 exp.IndexParameters, 3714 using=using, 3715 columns=columns, 3716 include=include, 3717 partition_by=partition_by, 3718 where=where, 3719 with_storage=with_storage, 3720 tablespace=tablespace, 3721 on=on, 3722 ) 3723 3724 def _parse_index( 3725 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3726 ) -> t.Optional[exp.Index]: 3727 if index or anonymous: 3728 unique = None 3729 primary = None 3730 amp = None 3731 3732 self._match(TokenType.ON) 3733 self._match(TokenType.TABLE) # hive 3734 table = self._parse_table_parts(schema=True) 3735 else: 3736 unique = self._match(TokenType.UNIQUE) 3737 primary = self._match_text_seq("PRIMARY") 3738 amp = self._match_text_seq("AMP") 3739 3740 if not self._match(TokenType.INDEX): 3741 return None 3742 3743 index = self._parse_id_var() 3744 table = None 3745 3746 params = self._parse_index_params() 3747 3748 return self.expression( 3749 exp.Index, 3750 this=index, 3751 table=table, 3752 unique=unique, 3753 primary=primary, 3754 amp=amp, 3755 params=params, 3756 ) 3757 3758 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3759 hints: t.List[exp.Expression] = [] 3760 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3761 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3762 hints.append( 3763 self.expression( 3764 exp.WithTableHint, 3765 expressions=self._parse_csv( 3766 lambda: self._parse_function() or self._parse_var(any_token=True) 3767 ), 3768 ) 3769 ) 3770 self._match_r_paren() 3771 else: 3772 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3773 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3774 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3775 3776 self._match_set((TokenType.INDEX, TokenType.KEY)) 3777 if self._match(TokenType.FOR): 3778 hint.set("target", self._advance_any() and self._prev.text.upper()) 3779 3780 hint.set("expressions", self._parse_wrapped_id_vars()) 3781 hints.append(hint) 3782 3783 return hints or None 3784 3785 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3786 return ( 3787 (not schema and self._parse_function(optional_parens=False)) 3788 or self._parse_id_var(any_token=False) 3789 or self._parse_string_as_identifier() 3790 or self._parse_placeholder() 3791 ) 3792 3793 def _parse_table_parts( 3794 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3795 ) -> exp.Table: 3796 catalog = None 3797 db = None 3798 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3799 3800 while self._match(TokenType.DOT): 3801 if catalog: 3802 # This allows nesting the table in arbitrarily many dot expressions if needed 3803 table = self.expression( 3804 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3805 ) 3806 else: 3807 catalog = db 3808 db = table 3809 # "" used for tsql FROM a..b case 3810 table = self._parse_table_part(schema=schema) or "" 3811 3812 if ( 3813 wildcard 3814 and self._is_connected() 3815 and (isinstance(table, exp.Identifier) or not table) 3816 and self._match(TokenType.STAR) 3817 ): 3818 if isinstance(table, exp.Identifier): 3819 table.args["this"] += "*" 3820 else: 3821 table = exp.Identifier(this="*") 3822 3823 # We bubble up comments from the Identifier to the Table 3824 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3825 3826 if is_db_reference: 3827 catalog = db 3828 db = table 3829 table = None 3830 3831 if not table and not is_db_reference: 3832 self.raise_error(f"Expected table name but got {self._curr}") 3833 if not db and is_db_reference: 3834 self.raise_error(f"Expected database name but got {self._curr}") 3835 3836 table = self.expression( 3837 exp.Table, 3838 comments=comments, 3839 this=table, 3840 db=db, 3841 catalog=catalog, 3842 ) 3843 3844 changes = self._parse_changes() 3845 if changes: 3846 table.set("changes", changes) 3847 3848 at_before = self._parse_historical_data() 3849 if at_before: 3850 table.set("when", at_before) 3851 3852 pivots = self._parse_pivots() 3853 if pivots: 3854 table.set("pivots", pivots) 3855 3856 return table 3857 3858 def _parse_table( 3859 self, 3860 schema: bool = False, 3861 joins: bool = False, 3862 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3863 parse_bracket: bool = False, 3864 is_db_reference: bool = False, 3865 parse_partition: bool = False, 3866 ) -> t.Optional[exp.Expression]: 3867 lateral = self._parse_lateral() 3868 if lateral: 3869 return lateral 3870 3871 unnest = self._parse_unnest() 3872 if unnest: 3873 return unnest 3874 3875 values = self._parse_derived_table_values() 3876 if values: 3877 return values 3878 3879 subquery = self._parse_select(table=True) 3880 if subquery: 3881 if not subquery.args.get("pivots"): 3882 subquery.set("pivots", self._parse_pivots()) 3883 return subquery 3884 3885 bracket = parse_bracket and self._parse_bracket(None) 3886 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3887 3888 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3889 self._parse_table 3890 ) 3891 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3892 3893 only = self._match(TokenType.ONLY) 3894 3895 this = t.cast( 3896 exp.Expression, 3897 bracket 3898 or rows_from 3899 or self._parse_bracket( 3900 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3901 ), 3902 ) 3903 3904 if only: 3905 this.set("only", only) 3906 3907 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3908 self._match_text_seq("*") 3909 3910 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3911 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3912 this.set("partition", self._parse_partition()) 3913 3914 if schema: 3915 return self._parse_schema(this=this) 3916 3917 version = self._parse_version() 3918 3919 if version: 3920 this.set("version", version) 3921 3922 if self.dialect.ALIAS_POST_TABLESAMPLE: 3923 this.set("sample", self._parse_table_sample()) 3924 3925 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3926 if alias: 3927 this.set("alias", alias) 3928 3929 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3930 return self.expression( 3931 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3932 ) 3933 3934 this.set("hints", self._parse_table_hints()) 3935 3936 if not this.args.get("pivots"): 3937 this.set("pivots", self._parse_pivots()) 3938 3939 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3940 this.set("sample", self._parse_table_sample()) 3941 3942 if joins: 3943 for join in self._parse_joins(): 3944 this.append("joins", join) 3945 3946 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3947 this.set("ordinality", True) 3948 this.set("alias", self._parse_table_alias()) 3949 3950 return this 3951 3952 def _parse_version(self) -> t.Optional[exp.Version]: 3953 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3954 this = "TIMESTAMP" 3955 elif self._match(TokenType.VERSION_SNAPSHOT): 3956 this = "VERSION" 3957 else: 3958 return None 3959 3960 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3961 kind = self._prev.text.upper() 3962 start = self._parse_bitwise() 3963 self._match_texts(("TO", "AND")) 3964 end = self._parse_bitwise() 3965 expression: t.Optional[exp.Expression] = self.expression( 3966 exp.Tuple, expressions=[start, end] 3967 ) 3968 elif self._match_text_seq("CONTAINED", "IN"): 3969 kind = "CONTAINED IN" 3970 expression = self.expression( 3971 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3972 ) 3973 elif self._match(TokenType.ALL): 3974 kind = "ALL" 3975 expression = None 3976 else: 3977 self._match_text_seq("AS", "OF") 3978 kind = "AS OF" 3979 expression = self._parse_type() 3980 3981 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3982 3983 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3984 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3985 index = self._index 3986 historical_data = None 3987 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3988 this = self._prev.text.upper() 3989 kind = ( 3990 self._match(TokenType.L_PAREN) 3991 and self._match_texts(self.HISTORICAL_DATA_KIND) 3992 and self._prev.text.upper() 3993 ) 3994 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3995 3996 if expression: 3997 self._match_r_paren() 3998 historical_data = self.expression( 3999 exp.HistoricalData, this=this, kind=kind, expression=expression 4000 ) 4001 else: 4002 self._retreat(index) 4003 4004 return historical_data 4005 4006 def _parse_changes(self) -> t.Optional[exp.Changes]: 4007 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4008 return None 4009 4010 information = self._parse_var(any_token=True) 4011 self._match_r_paren() 4012 4013 return self.expression( 4014 exp.Changes, 4015 information=information, 4016 at_before=self._parse_historical_data(), 4017 end=self._parse_historical_data(), 4018 ) 4019 4020 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4021 if not self._match(TokenType.UNNEST): 4022 return None 4023 4024 expressions = self._parse_wrapped_csv(self._parse_equality) 4025 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4026 4027 alias = self._parse_table_alias() if with_alias else None 4028 4029 if alias: 4030 if self.dialect.UNNEST_COLUMN_ONLY: 4031 if alias.args.get("columns"): 4032 self.raise_error("Unexpected extra column alias in unnest.") 4033 4034 alias.set("columns", [alias.this]) 4035 alias.set("this", None) 4036 4037 columns = alias.args.get("columns") or [] 4038 if offset and len(expressions) < len(columns): 4039 offset = columns.pop() 4040 4041 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4042 self._match(TokenType.ALIAS) 4043 offset = self._parse_id_var( 4044 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4045 ) or exp.to_identifier("offset") 4046 4047 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4048 4049 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4050 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4051 if not is_derived and not ( 4052 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4053 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4054 ): 4055 return None 4056 4057 expressions = self._parse_csv(self._parse_value) 4058 alias = self._parse_table_alias() 4059 4060 if is_derived: 4061 self._match_r_paren() 4062 4063 return self.expression( 4064 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4065 ) 4066 4067 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4068 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4069 as_modifier and self._match_text_seq("USING", "SAMPLE") 4070 ): 4071 return None 4072 4073 bucket_numerator = None 4074 bucket_denominator = None 4075 bucket_field = None 4076 percent = None 4077 size = None 4078 seed = None 4079 4080 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4081 matched_l_paren = self._match(TokenType.L_PAREN) 4082 4083 if self.TABLESAMPLE_CSV: 4084 num = None 4085 expressions = self._parse_csv(self._parse_primary) 4086 else: 4087 expressions = None 4088 num = ( 4089 self._parse_factor() 4090 if self._match(TokenType.NUMBER, advance=False) 4091 else self._parse_primary() or self._parse_placeholder() 4092 ) 4093 4094 if self._match_text_seq("BUCKET"): 4095 bucket_numerator = self._parse_number() 4096 self._match_text_seq("OUT", "OF") 4097 bucket_denominator = bucket_denominator = self._parse_number() 4098 self._match(TokenType.ON) 4099 bucket_field = self._parse_field() 4100 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4101 percent = num 4102 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4103 size = num 4104 else: 4105 percent = num 4106 4107 if matched_l_paren: 4108 self._match_r_paren() 4109 4110 if self._match(TokenType.L_PAREN): 4111 method = self._parse_var(upper=True) 4112 seed = self._match(TokenType.COMMA) and self._parse_number() 4113 self._match_r_paren() 4114 elif self._match_texts(("SEED", "REPEATABLE")): 4115 seed = self._parse_wrapped(self._parse_number) 4116 4117 if not method and self.DEFAULT_SAMPLING_METHOD: 4118 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4119 4120 return self.expression( 4121 exp.TableSample, 4122 expressions=expressions, 4123 method=method, 4124 bucket_numerator=bucket_numerator, 4125 bucket_denominator=bucket_denominator, 4126 bucket_field=bucket_field, 4127 percent=percent, 4128 size=size, 4129 seed=seed, 4130 ) 4131 4132 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4133 return list(iter(self._parse_pivot, None)) or None 4134 4135 def _parse_joins(self) -> t.Iterator[exp.Join]: 4136 return iter(self._parse_join, None) 4137 4138 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4139 if not self._match(TokenType.INTO): 4140 return None 4141 4142 return self.expression( 4143 exp.UnpivotColumns, 4144 this=self._match_text_seq("NAME") and self._parse_column(), 4145 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4146 ) 4147 4148 # https://duckdb.org/docs/sql/statements/pivot 4149 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4150 def _parse_on() -> t.Optional[exp.Expression]: 4151 this = self._parse_bitwise() 4152 4153 if self._match(TokenType.IN): 4154 # PIVOT ... ON col IN (row_val1, row_val2) 4155 return self._parse_in(this) 4156 if self._match(TokenType.ALIAS, advance=False): 4157 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4158 return self._parse_alias(this) 4159 4160 return this 4161 4162 this = self._parse_table() 4163 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4164 into = self._parse_unpivot_columns() 4165 using = self._match(TokenType.USING) and self._parse_csv( 4166 lambda: self._parse_alias(self._parse_function()) 4167 ) 4168 group = self._parse_group() 4169 4170 return self.expression( 4171 exp.Pivot, 4172 this=this, 4173 expressions=expressions, 4174 using=using, 4175 group=group, 4176 unpivot=is_unpivot, 4177 into=into, 4178 ) 4179 4180 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4181 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4182 this = self._parse_select_or_expression() 4183 4184 self._match(TokenType.ALIAS) 4185 alias = self._parse_bitwise() 4186 if alias: 4187 if isinstance(alias, exp.Column) and not alias.db: 4188 alias = alias.this 4189 return self.expression(exp.PivotAlias, this=this, alias=alias) 4190 4191 return this 4192 4193 value = self._parse_column() 4194 4195 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4196 self.raise_error("Expecting IN (") 4197 4198 if self._match(TokenType.ANY): 4199 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4200 else: 4201 exprs = self._parse_csv(_parse_aliased_expression) 4202 4203 self._match_r_paren() 4204 return self.expression(exp.In, this=value, expressions=exprs) 4205 4206 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4207 index = self._index 4208 include_nulls = None 4209 4210 if self._match(TokenType.PIVOT): 4211 unpivot = False 4212 elif self._match(TokenType.UNPIVOT): 4213 unpivot = True 4214 4215 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4216 if self._match_text_seq("INCLUDE", "NULLS"): 4217 include_nulls = True 4218 elif self._match_text_seq("EXCLUDE", "NULLS"): 4219 include_nulls = False 4220 else: 4221 return None 4222 4223 expressions = [] 4224 4225 if not self._match(TokenType.L_PAREN): 4226 self._retreat(index) 4227 return None 4228 4229 if unpivot: 4230 expressions = self._parse_csv(self._parse_column) 4231 else: 4232 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4233 4234 if not expressions: 4235 self.raise_error("Failed to parse PIVOT's aggregation list") 4236 4237 if not self._match(TokenType.FOR): 4238 self.raise_error("Expecting FOR") 4239 4240 field = self._parse_pivot_in() 4241 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4242 self._parse_bitwise 4243 ) 4244 4245 self._match_r_paren() 4246 4247 pivot = self.expression( 4248 exp.Pivot, 4249 expressions=expressions, 4250 field=field, 4251 unpivot=unpivot, 4252 include_nulls=include_nulls, 4253 default_on_null=default_on_null, 4254 ) 4255 4256 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4257 pivot.set("alias", self._parse_table_alias()) 4258 4259 if not unpivot: 4260 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4261 4262 columns: t.List[exp.Expression] = [] 4263 pivot_field_expressions = pivot.args["field"].expressions 4264 4265 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4266 if not isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4267 for fld in pivot_field_expressions: 4268 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4269 for name in names: 4270 if self.PREFIXED_PIVOT_COLUMNS: 4271 name = f"{name}_{field_name}" if name else field_name 4272 else: 4273 name = f"{field_name}_{name}" if name else field_name 4274 4275 columns.append(exp.to_identifier(name)) 4276 4277 pivot.set("columns", columns) 4278 4279 return pivot 4280 4281 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4282 return [agg.alias for agg in aggregations] 4283 4284 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4285 if not skip_where_token and not self._match(TokenType.PREWHERE): 4286 return None 4287 4288 return self.expression( 4289 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4290 ) 4291 4292 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4293 if not skip_where_token and not self._match(TokenType.WHERE): 4294 return None 4295 4296 return self.expression( 4297 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4298 ) 4299 4300 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4301 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4302 return None 4303 4304 elements: t.Dict[str, t.Any] = defaultdict(list) 4305 4306 if self._match(TokenType.ALL): 4307 elements["all"] = True 4308 elif self._match(TokenType.DISTINCT): 4309 elements["all"] = False 4310 4311 while True: 4312 index = self._index 4313 4314 elements["expressions"].extend( 4315 self._parse_csv( 4316 lambda: None 4317 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4318 else self._parse_assignment() 4319 ) 4320 ) 4321 4322 before_with_index = self._index 4323 with_prefix = self._match(TokenType.WITH) 4324 4325 if self._match(TokenType.ROLLUP): 4326 elements["rollup"].append( 4327 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4328 ) 4329 elif self._match(TokenType.CUBE): 4330 elements["cube"].append( 4331 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4332 ) 4333 elif self._match(TokenType.GROUPING_SETS): 4334 elements["grouping_sets"].append( 4335 self.expression( 4336 exp.GroupingSets, 4337 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4338 ) 4339 ) 4340 elif self._match_text_seq("TOTALS"): 4341 elements["totals"] = True # type: ignore 4342 4343 if before_with_index <= self._index <= before_with_index + 1: 4344 self._retreat(before_with_index) 4345 break 4346 4347 if index == self._index: 4348 break 4349 4350 return self.expression(exp.Group, **elements) # type: ignore 4351 4352 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4353 return self.expression( 4354 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4355 ) 4356 4357 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4358 if self._match(TokenType.L_PAREN): 4359 grouping_set = self._parse_csv(self._parse_column) 4360 self._match_r_paren() 4361 return self.expression(exp.Tuple, expressions=grouping_set) 4362 4363 return self._parse_column() 4364 4365 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4366 if not skip_having_token and not self._match(TokenType.HAVING): 4367 return None 4368 return self.expression(exp.Having, this=self._parse_assignment()) 4369 4370 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4371 if not self._match(TokenType.QUALIFY): 4372 return None 4373 return self.expression(exp.Qualify, this=self._parse_assignment()) 4374 4375 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4376 if skip_start_token: 4377 start = None 4378 elif self._match(TokenType.START_WITH): 4379 start = self._parse_assignment() 4380 else: 4381 return None 4382 4383 self._match(TokenType.CONNECT_BY) 4384 nocycle = self._match_text_seq("NOCYCLE") 4385 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4386 exp.Prior, this=self._parse_bitwise() 4387 ) 4388 connect = self._parse_assignment() 4389 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4390 4391 if not start and self._match(TokenType.START_WITH): 4392 start = self._parse_assignment() 4393 4394 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4395 4396 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4397 this = self._parse_id_var(any_token=True) 4398 if self._match(TokenType.ALIAS): 4399 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4400 return this 4401 4402 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4403 if self._match_text_seq("INTERPOLATE"): 4404 return self._parse_wrapped_csv(self._parse_name_as_expression) 4405 return None 4406 4407 def _parse_order( 4408 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4409 ) -> t.Optional[exp.Expression]: 4410 siblings = None 4411 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4412 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4413 return this 4414 4415 siblings = True 4416 4417 return self.expression( 4418 exp.Order, 4419 this=this, 4420 expressions=self._parse_csv(self._parse_ordered), 4421 siblings=siblings, 4422 ) 4423 4424 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4425 if not self._match(token): 4426 return None 4427 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4428 4429 def _parse_ordered( 4430 self, parse_method: t.Optional[t.Callable] = None 4431 ) -> t.Optional[exp.Ordered]: 4432 this = parse_method() if parse_method else self._parse_assignment() 4433 if not this: 4434 return None 4435 4436 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4437 this = exp.var("ALL") 4438 4439 asc = self._match(TokenType.ASC) 4440 desc = self._match(TokenType.DESC) or (asc and False) 4441 4442 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4443 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4444 4445 nulls_first = is_nulls_first or False 4446 explicitly_null_ordered = is_nulls_first or is_nulls_last 4447 4448 if ( 4449 not explicitly_null_ordered 4450 and ( 4451 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4452 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4453 ) 4454 and self.dialect.NULL_ORDERING != "nulls_are_last" 4455 ): 4456 nulls_first = True 4457 4458 if self._match_text_seq("WITH", "FILL"): 4459 with_fill = self.expression( 4460 exp.WithFill, 4461 **{ # type: ignore 4462 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4463 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4464 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4465 "interpolate": self._parse_interpolate(), 4466 }, 4467 ) 4468 else: 4469 with_fill = None 4470 4471 return self.expression( 4472 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4473 ) 4474 4475 def _parse_limit_options(self) -> exp.LimitOptions: 4476 percent = self._match(TokenType.PERCENT) 4477 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4478 self._match_text_seq("ONLY") 4479 with_ties = self._match_text_seq("WITH", "TIES") 4480 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4481 4482 def _parse_limit( 4483 self, 4484 this: t.Optional[exp.Expression] = None, 4485 top: bool = False, 4486 skip_limit_token: bool = False, 4487 ) -> t.Optional[exp.Expression]: 4488 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4489 comments = self._prev_comments 4490 if top: 4491 limit_paren = self._match(TokenType.L_PAREN) 4492 expression = self._parse_term() if limit_paren else self._parse_number() 4493 4494 if limit_paren: 4495 self._match_r_paren() 4496 4497 limit_options = self._parse_limit_options() 4498 else: 4499 limit_options = None 4500 expression = self._parse_term() 4501 4502 if self._match(TokenType.COMMA): 4503 offset = expression 4504 expression = self._parse_term() 4505 else: 4506 offset = None 4507 4508 limit_exp = self.expression( 4509 exp.Limit, 4510 this=this, 4511 expression=expression, 4512 offset=offset, 4513 comments=comments, 4514 limit_options=limit_options, 4515 expressions=self._parse_limit_by(), 4516 ) 4517 4518 return limit_exp 4519 4520 if self._match(TokenType.FETCH): 4521 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4522 direction = self._prev.text.upper() if direction else "FIRST" 4523 4524 count = self._parse_field(tokens=self.FETCH_TOKENS) 4525 4526 return self.expression( 4527 exp.Fetch, 4528 direction=direction, 4529 count=count, 4530 limit_options=self._parse_limit_options(), 4531 ) 4532 4533 return this 4534 4535 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4536 if not self._match(TokenType.OFFSET): 4537 return this 4538 4539 count = self._parse_term() 4540 self._match_set((TokenType.ROW, TokenType.ROWS)) 4541 4542 return self.expression( 4543 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4544 ) 4545 4546 def _can_parse_limit_or_offset(self) -> bool: 4547 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4548 return False 4549 4550 index = self._index 4551 result = bool( 4552 self._try_parse(self._parse_limit, retreat=True) 4553 or self._try_parse(self._parse_offset, retreat=True) 4554 ) 4555 self._retreat(index) 4556 return result 4557 4558 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4559 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4560 4561 def _parse_locks(self) -> t.List[exp.Lock]: 4562 locks = [] 4563 while True: 4564 if self._match_text_seq("FOR", "UPDATE"): 4565 update = True 4566 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4567 "LOCK", "IN", "SHARE", "MODE" 4568 ): 4569 update = False 4570 else: 4571 break 4572 4573 expressions = None 4574 if self._match_text_seq("OF"): 4575 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4576 4577 wait: t.Optional[bool | exp.Expression] = None 4578 if self._match_text_seq("NOWAIT"): 4579 wait = True 4580 elif self._match_text_seq("WAIT"): 4581 wait = self._parse_primary() 4582 elif self._match_text_seq("SKIP", "LOCKED"): 4583 wait = False 4584 4585 locks.append( 4586 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4587 ) 4588 4589 return locks 4590 4591 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4592 while this and self._match_set(self.SET_OPERATIONS): 4593 token_type = self._prev.token_type 4594 4595 if token_type == TokenType.UNION: 4596 operation: t.Type[exp.SetOperation] = exp.Union 4597 elif token_type == TokenType.EXCEPT: 4598 operation = exp.Except 4599 else: 4600 operation = exp.Intersect 4601 4602 comments = self._prev.comments 4603 4604 if self._match(TokenType.DISTINCT): 4605 distinct: t.Optional[bool] = True 4606 elif self._match(TokenType.ALL): 4607 distinct = False 4608 else: 4609 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4610 if distinct is None: 4611 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4612 4613 by_name = self._match_text_seq("BY", "NAME") 4614 expression = self._parse_select(nested=True, parse_set_operation=False) 4615 4616 this = self.expression( 4617 operation, 4618 comments=comments, 4619 this=this, 4620 distinct=distinct, 4621 by_name=by_name, 4622 expression=expression, 4623 ) 4624 4625 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4626 expression = this.expression 4627 4628 if expression: 4629 for arg in self.SET_OP_MODIFIERS: 4630 expr = expression.args.get(arg) 4631 if expr: 4632 this.set(arg, expr.pop()) 4633 4634 return this 4635 4636 def _parse_expression(self) -> t.Optional[exp.Expression]: 4637 return self._parse_alias(self._parse_assignment()) 4638 4639 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4640 this = self._parse_disjunction() 4641 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4642 # This allows us to parse <non-identifier token> := <expr> 4643 this = exp.column( 4644 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4645 ) 4646 4647 while self._match_set(self.ASSIGNMENT): 4648 if isinstance(this, exp.Column) and len(this.parts) == 1: 4649 this = this.this 4650 4651 this = self.expression( 4652 self.ASSIGNMENT[self._prev.token_type], 4653 this=this, 4654 comments=self._prev_comments, 4655 expression=self._parse_assignment(), 4656 ) 4657 4658 return this 4659 4660 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4661 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4662 4663 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4664 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4665 4666 def _parse_equality(self) -> t.Optional[exp.Expression]: 4667 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4668 4669 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4670 return self._parse_tokens(self._parse_range, self.COMPARISON) 4671 4672 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4673 this = this or self._parse_bitwise() 4674 negate = self._match(TokenType.NOT) 4675 4676 if self._match_set(self.RANGE_PARSERS): 4677 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4678 if not expression: 4679 return this 4680 4681 this = expression 4682 elif self._match(TokenType.ISNULL): 4683 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4684 4685 # Postgres supports ISNULL and NOTNULL for conditions. 4686 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4687 if self._match(TokenType.NOTNULL): 4688 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4689 this = self.expression(exp.Not, this=this) 4690 4691 if negate: 4692 this = self._negate_range(this) 4693 4694 if self._match(TokenType.IS): 4695 this = self._parse_is(this) 4696 4697 return this 4698 4699 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4700 if not this: 4701 return this 4702 4703 return self.expression(exp.Not, this=this) 4704 4705 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4706 index = self._index - 1 4707 negate = self._match(TokenType.NOT) 4708 4709 if self._match_text_seq("DISTINCT", "FROM"): 4710 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4711 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4712 4713 if self._match(TokenType.JSON): 4714 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4715 4716 if self._match_text_seq("WITH"): 4717 _with = True 4718 elif self._match_text_seq("WITHOUT"): 4719 _with = False 4720 else: 4721 _with = None 4722 4723 unique = self._match(TokenType.UNIQUE) 4724 self._match_text_seq("KEYS") 4725 expression: t.Optional[exp.Expression] = self.expression( 4726 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4727 ) 4728 else: 4729 expression = self._parse_primary() or self._parse_null() 4730 if not expression: 4731 self._retreat(index) 4732 return None 4733 4734 this = self.expression(exp.Is, this=this, expression=expression) 4735 return self.expression(exp.Not, this=this) if negate else this 4736 4737 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4738 unnest = self._parse_unnest(with_alias=False) 4739 if unnest: 4740 this = self.expression(exp.In, this=this, unnest=unnest) 4741 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4742 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4743 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4744 4745 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4746 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4747 else: 4748 this = self.expression(exp.In, this=this, expressions=expressions) 4749 4750 if matched_l_paren: 4751 self._match_r_paren(this) 4752 elif not self._match(TokenType.R_BRACKET, expression=this): 4753 self.raise_error("Expecting ]") 4754 else: 4755 this = self.expression(exp.In, this=this, field=self._parse_column()) 4756 4757 return this 4758 4759 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4760 low = self._parse_bitwise() 4761 self._match(TokenType.AND) 4762 high = self._parse_bitwise() 4763 return self.expression(exp.Between, this=this, low=low, high=high) 4764 4765 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4766 if not self._match(TokenType.ESCAPE): 4767 return this 4768 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4769 4770 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4771 index = self._index 4772 4773 if not self._match(TokenType.INTERVAL) and match_interval: 4774 return None 4775 4776 if self._match(TokenType.STRING, advance=False): 4777 this = self._parse_primary() 4778 else: 4779 this = self._parse_term() 4780 4781 if not this or ( 4782 isinstance(this, exp.Column) 4783 and not this.table 4784 and not this.this.quoted 4785 and this.name.upper() == "IS" 4786 ): 4787 self._retreat(index) 4788 return None 4789 4790 unit = self._parse_function() or ( 4791 not self._match(TokenType.ALIAS, advance=False) 4792 and self._parse_var(any_token=True, upper=True) 4793 ) 4794 4795 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4796 # each INTERVAL expression into this canonical form so it's easy to transpile 4797 if this and this.is_number: 4798 this = exp.Literal.string(this.to_py()) 4799 elif this and this.is_string: 4800 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4801 if parts and unit: 4802 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4803 unit = None 4804 self._retreat(self._index - 1) 4805 4806 if len(parts) == 1: 4807 this = exp.Literal.string(parts[0][0]) 4808 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4809 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4810 unit = self.expression( 4811 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4812 ) 4813 4814 interval = self.expression(exp.Interval, this=this, unit=unit) 4815 4816 index = self._index 4817 self._match(TokenType.PLUS) 4818 4819 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4820 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4821 return self.expression( 4822 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4823 ) 4824 4825 self._retreat(index) 4826 return interval 4827 4828 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4829 this = self._parse_term() 4830 4831 while True: 4832 if self._match_set(self.BITWISE): 4833 this = self.expression( 4834 self.BITWISE[self._prev.token_type], 4835 this=this, 4836 expression=self._parse_term(), 4837 ) 4838 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4839 this = self.expression( 4840 exp.DPipe, 4841 this=this, 4842 expression=self._parse_term(), 4843 safe=not self.dialect.STRICT_STRING_CONCAT, 4844 ) 4845 elif self._match(TokenType.DQMARK): 4846 this = self.expression( 4847 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4848 ) 4849 elif self._match_pair(TokenType.LT, TokenType.LT): 4850 this = self.expression( 4851 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4852 ) 4853 elif self._match_pair(TokenType.GT, TokenType.GT): 4854 this = self.expression( 4855 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4856 ) 4857 else: 4858 break 4859 4860 return this 4861 4862 def _parse_term(self) -> t.Optional[exp.Expression]: 4863 this = self._parse_factor() 4864 4865 while self._match_set(self.TERM): 4866 klass = self.TERM[self._prev.token_type] 4867 comments = self._prev_comments 4868 expression = self._parse_factor() 4869 4870 this = self.expression(klass, this=this, comments=comments, expression=expression) 4871 4872 if isinstance(this, exp.Collate): 4873 expr = this.expression 4874 4875 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4876 # fallback to Identifier / Var 4877 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4878 ident = expr.this 4879 if isinstance(ident, exp.Identifier): 4880 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4881 4882 return this 4883 4884 def _parse_factor(self) -> t.Optional[exp.Expression]: 4885 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4886 this = parse_method() 4887 4888 while self._match_set(self.FACTOR): 4889 klass = self.FACTOR[self._prev.token_type] 4890 comments = self._prev_comments 4891 expression = parse_method() 4892 4893 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4894 self._retreat(self._index - 1) 4895 return this 4896 4897 this = self.expression(klass, this=this, comments=comments, expression=expression) 4898 4899 if isinstance(this, exp.Div): 4900 this.args["typed"] = self.dialect.TYPED_DIVISION 4901 this.args["safe"] = self.dialect.SAFE_DIVISION 4902 4903 return this 4904 4905 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4906 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4907 4908 def _parse_unary(self) -> t.Optional[exp.Expression]: 4909 if self._match_set(self.UNARY_PARSERS): 4910 return self.UNARY_PARSERS[self._prev.token_type](self) 4911 return self._parse_at_time_zone(self._parse_type()) 4912 4913 def _parse_type( 4914 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4915 ) -> t.Optional[exp.Expression]: 4916 interval = parse_interval and self._parse_interval() 4917 if interval: 4918 return interval 4919 4920 index = self._index 4921 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4922 4923 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4924 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4925 if isinstance(data_type, exp.Cast): 4926 # This constructor can contain ops directly after it, for instance struct unnesting: 4927 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4928 return self._parse_column_ops(data_type) 4929 4930 if data_type: 4931 index2 = self._index 4932 this = self._parse_primary() 4933 4934 if isinstance(this, exp.Literal): 4935 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4936 if parser: 4937 return parser(self, this, data_type) 4938 4939 return self.expression(exp.Cast, this=this, to=data_type) 4940 4941 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4942 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4943 # 4944 # If the index difference here is greater than 1, that means the parser itself must have 4945 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4946 # 4947 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4948 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4949 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4950 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4951 # 4952 # In these cases, we don't really want to return the converted type, but instead retreat 4953 # and try to parse a Column or Identifier in the section below. 4954 if data_type.expressions and index2 - index > 1: 4955 self._retreat(index2) 4956 return self._parse_column_ops(data_type) 4957 4958 self._retreat(index) 4959 4960 if fallback_to_identifier: 4961 return self._parse_id_var() 4962 4963 this = self._parse_column() 4964 return this and self._parse_column_ops(this) 4965 4966 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4967 this = self._parse_type() 4968 if not this: 4969 return None 4970 4971 if isinstance(this, exp.Column) and not this.table: 4972 this = exp.var(this.name.upper()) 4973 4974 return self.expression( 4975 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4976 ) 4977 4978 def _parse_types( 4979 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4980 ) -> t.Optional[exp.Expression]: 4981 index = self._index 4982 4983 this: t.Optional[exp.Expression] = None 4984 prefix = self._match_text_seq("SYSUDTLIB", ".") 4985 4986 if not self._match_set(self.TYPE_TOKENS): 4987 identifier = allow_identifiers and self._parse_id_var( 4988 any_token=False, tokens=(TokenType.VAR,) 4989 ) 4990 if isinstance(identifier, exp.Identifier): 4991 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4992 4993 if len(tokens) != 1: 4994 self.raise_error("Unexpected identifier", self._prev) 4995 4996 if tokens[0].token_type in self.TYPE_TOKENS: 4997 self._prev = tokens[0] 4998 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4999 type_name = identifier.name 5000 5001 while self._match(TokenType.DOT): 5002 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5003 5004 this = exp.DataType.build(type_name, udt=True) 5005 else: 5006 self._retreat(self._index - 1) 5007 return None 5008 else: 5009 return None 5010 5011 type_token = self._prev.token_type 5012 5013 if type_token == TokenType.PSEUDO_TYPE: 5014 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5015 5016 if type_token == TokenType.OBJECT_IDENTIFIER: 5017 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5018 5019 # https://materialize.com/docs/sql/types/map/ 5020 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5021 key_type = self._parse_types( 5022 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5023 ) 5024 if not self._match(TokenType.FARROW): 5025 self._retreat(index) 5026 return None 5027 5028 value_type = self._parse_types( 5029 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5030 ) 5031 if not self._match(TokenType.R_BRACKET): 5032 self._retreat(index) 5033 return None 5034 5035 return exp.DataType( 5036 this=exp.DataType.Type.MAP, 5037 expressions=[key_type, value_type], 5038 nested=True, 5039 prefix=prefix, 5040 ) 5041 5042 nested = type_token in self.NESTED_TYPE_TOKENS 5043 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5044 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5045 expressions = None 5046 maybe_func = False 5047 5048 if self._match(TokenType.L_PAREN): 5049 if is_struct: 5050 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5051 elif nested: 5052 expressions = self._parse_csv( 5053 lambda: self._parse_types( 5054 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5055 ) 5056 ) 5057 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5058 this = expressions[0] 5059 this.set("nullable", True) 5060 self._match_r_paren() 5061 return this 5062 elif type_token in self.ENUM_TYPE_TOKENS: 5063 expressions = self._parse_csv(self._parse_equality) 5064 elif is_aggregate: 5065 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5066 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5067 ) 5068 if not func_or_ident: 5069 return None 5070 expressions = [func_or_ident] 5071 if self._match(TokenType.COMMA): 5072 expressions.extend( 5073 self._parse_csv( 5074 lambda: self._parse_types( 5075 check_func=check_func, 5076 schema=schema, 5077 allow_identifiers=allow_identifiers, 5078 ) 5079 ) 5080 ) 5081 else: 5082 expressions = self._parse_csv(self._parse_type_size) 5083 5084 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5085 if type_token == TokenType.VECTOR and len(expressions) == 2: 5086 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5087 5088 if not expressions or not self._match(TokenType.R_PAREN): 5089 self._retreat(index) 5090 return None 5091 5092 maybe_func = True 5093 5094 values: t.Optional[t.List[exp.Expression]] = None 5095 5096 if nested and self._match(TokenType.LT): 5097 if is_struct: 5098 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5099 else: 5100 expressions = self._parse_csv( 5101 lambda: self._parse_types( 5102 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5103 ) 5104 ) 5105 5106 if not self._match(TokenType.GT): 5107 self.raise_error("Expecting >") 5108 5109 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5110 values = self._parse_csv(self._parse_assignment) 5111 if not values and is_struct: 5112 values = None 5113 self._retreat(self._index - 1) 5114 else: 5115 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5116 5117 if type_token in self.TIMESTAMPS: 5118 if self._match_text_seq("WITH", "TIME", "ZONE"): 5119 maybe_func = False 5120 tz_type = ( 5121 exp.DataType.Type.TIMETZ 5122 if type_token in self.TIMES 5123 else exp.DataType.Type.TIMESTAMPTZ 5124 ) 5125 this = exp.DataType(this=tz_type, expressions=expressions) 5126 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5127 maybe_func = False 5128 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5129 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5130 maybe_func = False 5131 elif type_token == TokenType.INTERVAL: 5132 unit = self._parse_var(upper=True) 5133 if unit: 5134 if self._match_text_seq("TO"): 5135 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5136 5137 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5138 else: 5139 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5140 5141 if maybe_func and check_func: 5142 index2 = self._index 5143 peek = self._parse_string() 5144 5145 if not peek: 5146 self._retreat(index) 5147 return None 5148 5149 self._retreat(index2) 5150 5151 if not this: 5152 if self._match_text_seq("UNSIGNED"): 5153 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5154 if not unsigned_type_token: 5155 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5156 5157 type_token = unsigned_type_token or type_token 5158 5159 this = exp.DataType( 5160 this=exp.DataType.Type[type_token.value], 5161 expressions=expressions, 5162 nested=nested, 5163 prefix=prefix, 5164 ) 5165 5166 # Empty arrays/structs are allowed 5167 if values is not None: 5168 cls = exp.Struct if is_struct else exp.Array 5169 this = exp.cast(cls(expressions=values), this, copy=False) 5170 5171 elif expressions: 5172 this.set("expressions", expressions) 5173 5174 # https://materialize.com/docs/sql/types/list/#type-name 5175 while self._match(TokenType.LIST): 5176 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5177 5178 index = self._index 5179 5180 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5181 matched_array = self._match(TokenType.ARRAY) 5182 5183 while self._curr: 5184 datatype_token = self._prev.token_type 5185 matched_l_bracket = self._match(TokenType.L_BRACKET) 5186 5187 if (not matched_l_bracket and not matched_array) or ( 5188 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5189 ): 5190 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5191 # not to be confused with the fixed size array parsing 5192 break 5193 5194 matched_array = False 5195 values = self._parse_csv(self._parse_assignment) or None 5196 if ( 5197 values 5198 and not schema 5199 and ( 5200 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5201 ) 5202 ): 5203 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5204 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5205 self._retreat(index) 5206 break 5207 5208 this = exp.DataType( 5209 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5210 ) 5211 self._match(TokenType.R_BRACKET) 5212 5213 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5214 converter = self.TYPE_CONVERTERS.get(this.this) 5215 if converter: 5216 this = converter(t.cast(exp.DataType, this)) 5217 5218 return this 5219 5220 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5221 index = self._index 5222 5223 if ( 5224 self._curr 5225 and self._next 5226 and self._curr.token_type in self.TYPE_TOKENS 5227 and self._next.token_type in self.TYPE_TOKENS 5228 ): 5229 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5230 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5231 this = self._parse_id_var() 5232 else: 5233 this = ( 5234 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5235 or self._parse_id_var() 5236 ) 5237 5238 self._match(TokenType.COLON) 5239 5240 if ( 5241 type_required 5242 and not isinstance(this, exp.DataType) 5243 and not self._match_set(self.TYPE_TOKENS, advance=False) 5244 ): 5245 self._retreat(index) 5246 return self._parse_types() 5247 5248 return self._parse_column_def(this) 5249 5250 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5251 if not self._match_text_seq("AT", "TIME", "ZONE"): 5252 return this 5253 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5254 5255 def _parse_column(self) -> t.Optional[exp.Expression]: 5256 this = self._parse_column_reference() 5257 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5258 5259 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5260 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5261 5262 return column 5263 5264 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5265 this = self._parse_field() 5266 if ( 5267 not this 5268 and self._match(TokenType.VALUES, advance=False) 5269 and self.VALUES_FOLLOWED_BY_PAREN 5270 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5271 ): 5272 this = self._parse_id_var() 5273 5274 if isinstance(this, exp.Identifier): 5275 # We bubble up comments from the Identifier to the Column 5276 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5277 5278 return this 5279 5280 def _parse_colon_as_variant_extract( 5281 self, this: t.Optional[exp.Expression] 5282 ) -> t.Optional[exp.Expression]: 5283 casts = [] 5284 json_path = [] 5285 escape = None 5286 5287 while self._match(TokenType.COLON): 5288 start_index = self._index 5289 5290 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5291 path = self._parse_column_ops( 5292 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5293 ) 5294 5295 # The cast :: operator has a lower precedence than the extraction operator :, so 5296 # we rearrange the AST appropriately to avoid casting the JSON path 5297 while isinstance(path, exp.Cast): 5298 casts.append(path.to) 5299 path = path.this 5300 5301 if casts: 5302 dcolon_offset = next( 5303 i 5304 for i, t in enumerate(self._tokens[start_index:]) 5305 if t.token_type == TokenType.DCOLON 5306 ) 5307 end_token = self._tokens[start_index + dcolon_offset - 1] 5308 else: 5309 end_token = self._prev 5310 5311 if path: 5312 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5313 # it'll roundtrip to a string literal in GET_PATH 5314 if isinstance(path, exp.Identifier) and path.quoted: 5315 escape = True 5316 5317 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5318 5319 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5320 # Databricks transforms it back to the colon/dot notation 5321 if json_path: 5322 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5323 5324 if json_path_expr: 5325 json_path_expr.set("escape", escape) 5326 5327 this = self.expression( 5328 exp.JSONExtract, 5329 this=this, 5330 expression=json_path_expr, 5331 variant_extract=True, 5332 ) 5333 5334 while casts: 5335 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5336 5337 return this 5338 5339 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5340 return self._parse_types() 5341 5342 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5343 this = self._parse_bracket(this) 5344 5345 while self._match_set(self.COLUMN_OPERATORS): 5346 op_token = self._prev.token_type 5347 op = self.COLUMN_OPERATORS.get(op_token) 5348 5349 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5350 field = self._parse_dcolon() 5351 if not field: 5352 self.raise_error("Expected type") 5353 elif op and self._curr: 5354 field = self._parse_column_reference() or self._parse_bracket() 5355 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5356 field = self._parse_column_ops(field) 5357 else: 5358 field = self._parse_field(any_token=True, anonymous_func=True) 5359 5360 if isinstance(field, (exp.Func, exp.Window)) and this: 5361 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5362 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5363 this = exp.replace_tree( 5364 this, 5365 lambda n: ( 5366 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5367 if n.table 5368 else n.this 5369 ) 5370 if isinstance(n, exp.Column) 5371 else n, 5372 ) 5373 5374 if op: 5375 this = op(self, this, field) 5376 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5377 this = self.expression( 5378 exp.Column, 5379 comments=this.comments, 5380 this=field, 5381 table=this.this, 5382 db=this.args.get("table"), 5383 catalog=this.args.get("db"), 5384 ) 5385 elif isinstance(field, exp.Window): 5386 # Move the exp.Dot's to the window's function 5387 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5388 field.set("this", window_func) 5389 this = field 5390 else: 5391 this = self.expression(exp.Dot, this=this, expression=field) 5392 5393 if field and field.comments: 5394 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5395 5396 this = self._parse_bracket(this) 5397 5398 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5399 5400 def _parse_primary(self) -> t.Optional[exp.Expression]: 5401 if self._match_set(self.PRIMARY_PARSERS): 5402 token_type = self._prev.token_type 5403 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5404 5405 if token_type == TokenType.STRING: 5406 expressions = [primary] 5407 while self._match(TokenType.STRING): 5408 expressions.append(exp.Literal.string(self._prev.text)) 5409 5410 if len(expressions) > 1: 5411 return self.expression(exp.Concat, expressions=expressions) 5412 5413 return primary 5414 5415 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5416 return exp.Literal.number(f"0.{self._prev.text}") 5417 5418 if self._match(TokenType.L_PAREN): 5419 comments = self._prev_comments 5420 query = self._parse_select() 5421 5422 if query: 5423 expressions = [query] 5424 else: 5425 expressions = self._parse_expressions() 5426 5427 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5428 5429 if not this and self._match(TokenType.R_PAREN, advance=False): 5430 this = self.expression(exp.Tuple) 5431 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5432 this = self._parse_subquery(this=this, parse_alias=False) 5433 elif isinstance(this, exp.Subquery): 5434 this = self._parse_subquery( 5435 this=self._parse_set_operations(this), parse_alias=False 5436 ) 5437 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5438 this = self.expression(exp.Tuple, expressions=expressions) 5439 else: 5440 this = self.expression(exp.Paren, this=this) 5441 5442 if this: 5443 this.add_comments(comments) 5444 5445 self._match_r_paren(expression=this) 5446 return this 5447 5448 return None 5449 5450 def _parse_field( 5451 self, 5452 any_token: bool = False, 5453 tokens: t.Optional[t.Collection[TokenType]] = None, 5454 anonymous_func: bool = False, 5455 ) -> t.Optional[exp.Expression]: 5456 if anonymous_func: 5457 field = ( 5458 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5459 or self._parse_primary() 5460 ) 5461 else: 5462 field = self._parse_primary() or self._parse_function( 5463 anonymous=anonymous_func, any_token=any_token 5464 ) 5465 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5466 5467 def _parse_function( 5468 self, 5469 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5470 anonymous: bool = False, 5471 optional_parens: bool = True, 5472 any_token: bool = False, 5473 ) -> t.Optional[exp.Expression]: 5474 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5475 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5476 fn_syntax = False 5477 if ( 5478 self._match(TokenType.L_BRACE, advance=False) 5479 and self._next 5480 and self._next.text.upper() == "FN" 5481 ): 5482 self._advance(2) 5483 fn_syntax = True 5484 5485 func = self._parse_function_call( 5486 functions=functions, 5487 anonymous=anonymous, 5488 optional_parens=optional_parens, 5489 any_token=any_token, 5490 ) 5491 5492 if fn_syntax: 5493 self._match(TokenType.R_BRACE) 5494 5495 return func 5496 5497 def _parse_function_call( 5498 self, 5499 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5500 anonymous: bool = False, 5501 optional_parens: bool = True, 5502 any_token: bool = False, 5503 ) -> t.Optional[exp.Expression]: 5504 if not self._curr: 5505 return None 5506 5507 comments = self._curr.comments 5508 token_type = self._curr.token_type 5509 this = self._curr.text 5510 upper = this.upper() 5511 5512 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5513 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5514 self._advance() 5515 return self._parse_window(parser(self)) 5516 5517 if not self._next or self._next.token_type != TokenType.L_PAREN: 5518 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5519 self._advance() 5520 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5521 5522 return None 5523 5524 if any_token: 5525 if token_type in self.RESERVED_TOKENS: 5526 return None 5527 elif token_type not in self.FUNC_TOKENS: 5528 return None 5529 5530 self._advance(2) 5531 5532 parser = self.FUNCTION_PARSERS.get(upper) 5533 if parser and not anonymous: 5534 this = parser(self) 5535 else: 5536 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5537 5538 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5539 this = self.expression( 5540 subquery_predicate, comments=comments, this=self._parse_select() 5541 ) 5542 self._match_r_paren() 5543 return this 5544 5545 if functions is None: 5546 functions = self.FUNCTIONS 5547 5548 function = functions.get(upper) 5549 known_function = function and not anonymous 5550 5551 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5552 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5553 5554 post_func_comments = self._curr and self._curr.comments 5555 if known_function and post_func_comments: 5556 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5557 # call we'll construct it as exp.Anonymous, even if it's "known" 5558 if any( 5559 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5560 for comment in post_func_comments 5561 ): 5562 known_function = False 5563 5564 if alias and known_function: 5565 args = self._kv_to_prop_eq(args) 5566 5567 if known_function: 5568 func_builder = t.cast(t.Callable, function) 5569 5570 if "dialect" in func_builder.__code__.co_varnames: 5571 func = func_builder(args, dialect=self.dialect) 5572 else: 5573 func = func_builder(args) 5574 5575 func = self.validate_expression(func, args) 5576 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5577 func.meta["name"] = this 5578 5579 this = func 5580 else: 5581 if token_type == TokenType.IDENTIFIER: 5582 this = exp.Identifier(this=this, quoted=True) 5583 this = self.expression(exp.Anonymous, this=this, expressions=args) 5584 5585 if isinstance(this, exp.Expression): 5586 this.add_comments(comments) 5587 5588 self._match_r_paren(this) 5589 return self._parse_window(this) 5590 5591 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5592 return expression 5593 5594 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5595 transformed = [] 5596 5597 for index, e in enumerate(expressions): 5598 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5599 if isinstance(e, exp.Alias): 5600 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5601 5602 if not isinstance(e, exp.PropertyEQ): 5603 e = self.expression( 5604 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5605 ) 5606 5607 if isinstance(e.this, exp.Column): 5608 e.this.replace(e.this.this) 5609 else: 5610 e = self._to_prop_eq(e, index) 5611 5612 transformed.append(e) 5613 5614 return transformed 5615 5616 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5617 return self._parse_statement() 5618 5619 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5620 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5621 5622 def _parse_user_defined_function( 5623 self, kind: t.Optional[TokenType] = None 5624 ) -> t.Optional[exp.Expression]: 5625 this = self._parse_table_parts(schema=True) 5626 5627 if not self._match(TokenType.L_PAREN): 5628 return this 5629 5630 expressions = self._parse_csv(self._parse_function_parameter) 5631 self._match_r_paren() 5632 return self.expression( 5633 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5634 ) 5635 5636 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5637 literal = self._parse_primary() 5638 if literal: 5639 return self.expression(exp.Introducer, this=token.text, expression=literal) 5640 5641 return self.expression(exp.Identifier, this=token.text) 5642 5643 def _parse_session_parameter(self) -> exp.SessionParameter: 5644 kind = None 5645 this = self._parse_id_var() or self._parse_primary() 5646 5647 if this and self._match(TokenType.DOT): 5648 kind = this.name 5649 this = self._parse_var() or self._parse_primary() 5650 5651 return self.expression(exp.SessionParameter, this=this, kind=kind) 5652 5653 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5654 return self._parse_id_var() 5655 5656 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5657 index = self._index 5658 5659 if self._match(TokenType.L_PAREN): 5660 expressions = t.cast( 5661 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5662 ) 5663 5664 if not self._match(TokenType.R_PAREN): 5665 self._retreat(index) 5666 else: 5667 expressions = [self._parse_lambda_arg()] 5668 5669 if self._match_set(self.LAMBDAS): 5670 return self.LAMBDAS[self._prev.token_type](self, expressions) 5671 5672 self._retreat(index) 5673 5674 this: t.Optional[exp.Expression] 5675 5676 if self._match(TokenType.DISTINCT): 5677 this = self.expression( 5678 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5679 ) 5680 else: 5681 this = self._parse_select_or_expression(alias=alias) 5682 5683 return self._parse_limit( 5684 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5685 ) 5686 5687 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5688 index = self._index 5689 if not self._match(TokenType.L_PAREN): 5690 return this 5691 5692 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5693 # expr can be of both types 5694 if self._match_set(self.SELECT_START_TOKENS): 5695 self._retreat(index) 5696 return this 5697 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5698 self._match_r_paren() 5699 return self.expression(exp.Schema, this=this, expressions=args) 5700 5701 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5702 return self._parse_column_def(self._parse_field(any_token=True)) 5703 5704 def _parse_column_def( 5705 self, this: t.Optional[exp.Expression], computed_column: bool = True 5706 ) -> t.Optional[exp.Expression]: 5707 # column defs are not really columns, they're identifiers 5708 if isinstance(this, exp.Column): 5709 this = this.this 5710 5711 if not computed_column: 5712 self._match(TokenType.ALIAS) 5713 5714 kind = self._parse_types(schema=True) 5715 5716 if self._match_text_seq("FOR", "ORDINALITY"): 5717 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5718 5719 constraints: t.List[exp.Expression] = [] 5720 5721 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5722 ("ALIAS", "MATERIALIZED") 5723 ): 5724 persisted = self._prev.text.upper() == "MATERIALIZED" 5725 constraint_kind = exp.ComputedColumnConstraint( 5726 this=self._parse_assignment(), 5727 persisted=persisted or self._match_text_seq("PERSISTED"), 5728 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5729 ) 5730 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5731 elif ( 5732 kind 5733 and self._match(TokenType.ALIAS, advance=False) 5734 and ( 5735 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5736 or (self._next and self._next.token_type == TokenType.L_PAREN) 5737 ) 5738 ): 5739 self._advance() 5740 constraints.append( 5741 self.expression( 5742 exp.ColumnConstraint, 5743 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5744 ) 5745 ) 5746 5747 while True: 5748 constraint = self._parse_column_constraint() 5749 if not constraint: 5750 break 5751 constraints.append(constraint) 5752 5753 if not kind and not constraints: 5754 return this 5755 5756 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5757 5758 def _parse_auto_increment( 5759 self, 5760 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5761 start = None 5762 increment = None 5763 5764 if self._match(TokenType.L_PAREN, advance=False): 5765 args = self._parse_wrapped_csv(self._parse_bitwise) 5766 start = seq_get(args, 0) 5767 increment = seq_get(args, 1) 5768 elif self._match_text_seq("START"): 5769 start = self._parse_bitwise() 5770 self._match_text_seq("INCREMENT") 5771 increment = self._parse_bitwise() 5772 5773 if start and increment: 5774 return exp.GeneratedAsIdentityColumnConstraint( 5775 start=start, increment=increment, this=False 5776 ) 5777 5778 return exp.AutoIncrementColumnConstraint() 5779 5780 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5781 if not self._match_text_seq("REFRESH"): 5782 self._retreat(self._index - 1) 5783 return None 5784 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5785 5786 def _parse_compress(self) -> exp.CompressColumnConstraint: 5787 if self._match(TokenType.L_PAREN, advance=False): 5788 return self.expression( 5789 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5790 ) 5791 5792 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5793 5794 def _parse_generated_as_identity( 5795 self, 5796 ) -> ( 5797 exp.GeneratedAsIdentityColumnConstraint 5798 | exp.ComputedColumnConstraint 5799 | exp.GeneratedAsRowColumnConstraint 5800 ): 5801 if self._match_text_seq("BY", "DEFAULT"): 5802 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5803 this = self.expression( 5804 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5805 ) 5806 else: 5807 self._match_text_seq("ALWAYS") 5808 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5809 5810 self._match(TokenType.ALIAS) 5811 5812 if self._match_text_seq("ROW"): 5813 start = self._match_text_seq("START") 5814 if not start: 5815 self._match(TokenType.END) 5816 hidden = self._match_text_seq("HIDDEN") 5817 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5818 5819 identity = self._match_text_seq("IDENTITY") 5820 5821 if self._match(TokenType.L_PAREN): 5822 if self._match(TokenType.START_WITH): 5823 this.set("start", self._parse_bitwise()) 5824 if self._match_text_seq("INCREMENT", "BY"): 5825 this.set("increment", self._parse_bitwise()) 5826 if self._match_text_seq("MINVALUE"): 5827 this.set("minvalue", self._parse_bitwise()) 5828 if self._match_text_seq("MAXVALUE"): 5829 this.set("maxvalue", self._parse_bitwise()) 5830 5831 if self._match_text_seq("CYCLE"): 5832 this.set("cycle", True) 5833 elif self._match_text_seq("NO", "CYCLE"): 5834 this.set("cycle", False) 5835 5836 if not identity: 5837 this.set("expression", self._parse_range()) 5838 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5839 args = self._parse_csv(self._parse_bitwise) 5840 this.set("start", seq_get(args, 0)) 5841 this.set("increment", seq_get(args, 1)) 5842 5843 self._match_r_paren() 5844 5845 return this 5846 5847 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5848 self._match_text_seq("LENGTH") 5849 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5850 5851 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5852 if self._match_text_seq("NULL"): 5853 return self.expression(exp.NotNullColumnConstraint) 5854 if self._match_text_seq("CASESPECIFIC"): 5855 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5856 if self._match_text_seq("FOR", "REPLICATION"): 5857 return self.expression(exp.NotForReplicationColumnConstraint) 5858 5859 # Unconsume the `NOT` token 5860 self._retreat(self._index - 1) 5861 return None 5862 5863 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5864 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5865 5866 procedure_option_follows = ( 5867 self._match(TokenType.WITH, advance=False) 5868 and self._next 5869 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5870 ) 5871 5872 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5873 return self.expression( 5874 exp.ColumnConstraint, 5875 this=this, 5876 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5877 ) 5878 5879 return this 5880 5881 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5882 if not self._match(TokenType.CONSTRAINT): 5883 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5884 5885 return self.expression( 5886 exp.Constraint, 5887 this=self._parse_id_var(), 5888 expressions=self._parse_unnamed_constraints(), 5889 ) 5890 5891 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5892 constraints = [] 5893 while True: 5894 constraint = self._parse_unnamed_constraint() or self._parse_function() 5895 if not constraint: 5896 break 5897 constraints.append(constraint) 5898 5899 return constraints 5900 5901 def _parse_unnamed_constraint( 5902 self, constraints: t.Optional[t.Collection[str]] = None 5903 ) -> t.Optional[exp.Expression]: 5904 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5905 constraints or self.CONSTRAINT_PARSERS 5906 ): 5907 return None 5908 5909 constraint = self._prev.text.upper() 5910 if constraint not in self.CONSTRAINT_PARSERS: 5911 self.raise_error(f"No parser found for schema constraint {constraint}.") 5912 5913 return self.CONSTRAINT_PARSERS[constraint](self) 5914 5915 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5916 return self._parse_id_var(any_token=False) 5917 5918 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5919 self._match_text_seq("KEY") 5920 return self.expression( 5921 exp.UniqueColumnConstraint, 5922 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5923 this=self._parse_schema(self._parse_unique_key()), 5924 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5925 on_conflict=self._parse_on_conflict(), 5926 ) 5927 5928 def _parse_key_constraint_options(self) -> t.List[str]: 5929 options = [] 5930 while True: 5931 if not self._curr: 5932 break 5933 5934 if self._match(TokenType.ON): 5935 action = None 5936 on = self._advance_any() and self._prev.text 5937 5938 if self._match_text_seq("NO", "ACTION"): 5939 action = "NO ACTION" 5940 elif self._match_text_seq("CASCADE"): 5941 action = "CASCADE" 5942 elif self._match_text_seq("RESTRICT"): 5943 action = "RESTRICT" 5944 elif self._match_pair(TokenType.SET, TokenType.NULL): 5945 action = "SET NULL" 5946 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5947 action = "SET DEFAULT" 5948 else: 5949 self.raise_error("Invalid key constraint") 5950 5951 options.append(f"ON {on} {action}") 5952 else: 5953 var = self._parse_var_from_options( 5954 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5955 ) 5956 if not var: 5957 break 5958 options.append(var.name) 5959 5960 return options 5961 5962 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5963 if match and not self._match(TokenType.REFERENCES): 5964 return None 5965 5966 expressions = None 5967 this = self._parse_table(schema=True) 5968 options = self._parse_key_constraint_options() 5969 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5970 5971 def _parse_foreign_key(self) -> exp.ForeignKey: 5972 expressions = self._parse_wrapped_id_vars() 5973 reference = self._parse_references() 5974 options = {} 5975 5976 while self._match(TokenType.ON): 5977 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5978 self.raise_error("Expected DELETE or UPDATE") 5979 5980 kind = self._prev.text.lower() 5981 5982 if self._match_text_seq("NO", "ACTION"): 5983 action = "NO ACTION" 5984 elif self._match(TokenType.SET): 5985 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5986 action = "SET " + self._prev.text.upper() 5987 else: 5988 self._advance() 5989 action = self._prev.text.upper() 5990 5991 options[kind] = action 5992 5993 return self.expression( 5994 exp.ForeignKey, 5995 expressions=expressions, 5996 reference=reference, 5997 **options, # type: ignore 5998 ) 5999 6000 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6001 return self._parse_ordered() or self._parse_field() 6002 6003 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6004 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6005 self._retreat(self._index - 1) 6006 return None 6007 6008 id_vars = self._parse_wrapped_id_vars() 6009 return self.expression( 6010 exp.PeriodForSystemTimeConstraint, 6011 this=seq_get(id_vars, 0), 6012 expression=seq_get(id_vars, 1), 6013 ) 6014 6015 def _parse_primary_key( 6016 self, wrapped_optional: bool = False, in_props: bool = False 6017 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6018 desc = ( 6019 self._match_set((TokenType.ASC, TokenType.DESC)) 6020 and self._prev.token_type == TokenType.DESC 6021 ) 6022 6023 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6024 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 6025 6026 expressions = self._parse_wrapped_csv( 6027 self._parse_primary_key_part, optional=wrapped_optional 6028 ) 6029 options = self._parse_key_constraint_options() 6030 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6031 6032 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6033 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6034 6035 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6036 """ 6037 Parses a datetime column in ODBC format. We parse the column into the corresponding 6038 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6039 same as we did for `DATE('yyyy-mm-dd')`. 6040 6041 Reference: 6042 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6043 """ 6044 self._match(TokenType.VAR) 6045 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6046 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6047 if not self._match(TokenType.R_BRACE): 6048 self.raise_error("Expected }") 6049 return expression 6050 6051 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6052 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6053 return this 6054 6055 bracket_kind = self._prev.token_type 6056 if ( 6057 bracket_kind == TokenType.L_BRACE 6058 and self._curr 6059 and self._curr.token_type == TokenType.VAR 6060 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6061 ): 6062 return self._parse_odbc_datetime_literal() 6063 6064 expressions = self._parse_csv( 6065 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6066 ) 6067 6068 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6069 self.raise_error("Expected ]") 6070 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6071 self.raise_error("Expected }") 6072 6073 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6074 if bracket_kind == TokenType.L_BRACE: 6075 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6076 elif not this: 6077 this = build_array_constructor( 6078 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6079 ) 6080 else: 6081 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6082 if constructor_type: 6083 return build_array_constructor( 6084 constructor_type, 6085 args=expressions, 6086 bracket_kind=bracket_kind, 6087 dialect=self.dialect, 6088 ) 6089 6090 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6091 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6092 6093 self._add_comments(this) 6094 return self._parse_bracket(this) 6095 6096 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6097 if self._match(TokenType.COLON): 6098 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6099 return this 6100 6101 def _parse_case(self) -> t.Optional[exp.Expression]: 6102 ifs = [] 6103 default = None 6104 6105 comments = self._prev_comments 6106 expression = self._parse_assignment() 6107 6108 while self._match(TokenType.WHEN): 6109 this = self._parse_assignment() 6110 self._match(TokenType.THEN) 6111 then = self._parse_assignment() 6112 ifs.append(self.expression(exp.If, this=this, true=then)) 6113 6114 if self._match(TokenType.ELSE): 6115 default = self._parse_assignment() 6116 6117 if not self._match(TokenType.END): 6118 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6119 default = exp.column("interval") 6120 else: 6121 self.raise_error("Expected END after CASE", self._prev) 6122 6123 return self.expression( 6124 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6125 ) 6126 6127 def _parse_if(self) -> t.Optional[exp.Expression]: 6128 if self._match(TokenType.L_PAREN): 6129 args = self._parse_csv(self._parse_assignment) 6130 this = self.validate_expression(exp.If.from_arg_list(args), args) 6131 self._match_r_paren() 6132 else: 6133 index = self._index - 1 6134 6135 if self.NO_PAREN_IF_COMMANDS and index == 0: 6136 return self._parse_as_command(self._prev) 6137 6138 condition = self._parse_assignment() 6139 6140 if not condition: 6141 self._retreat(index) 6142 return None 6143 6144 self._match(TokenType.THEN) 6145 true = self._parse_assignment() 6146 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6147 self._match(TokenType.END) 6148 this = self.expression(exp.If, this=condition, true=true, false=false) 6149 6150 return this 6151 6152 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6153 if not self._match_text_seq("VALUE", "FOR"): 6154 self._retreat(self._index - 1) 6155 return None 6156 6157 return self.expression( 6158 exp.NextValueFor, 6159 this=self._parse_column(), 6160 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6161 ) 6162 6163 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6164 this = self._parse_function() or self._parse_var_or_string(upper=True) 6165 6166 if self._match(TokenType.FROM): 6167 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6168 6169 if not self._match(TokenType.COMMA): 6170 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6171 6172 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6173 6174 def _parse_gap_fill(self) -> exp.GapFill: 6175 self._match(TokenType.TABLE) 6176 this = self._parse_table() 6177 6178 self._match(TokenType.COMMA) 6179 args = [this, *self._parse_csv(self._parse_lambda)] 6180 6181 gap_fill = exp.GapFill.from_arg_list(args) 6182 return self.validate_expression(gap_fill, args) 6183 6184 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6185 this = self._parse_assignment() 6186 6187 if not self._match(TokenType.ALIAS): 6188 if self._match(TokenType.COMMA): 6189 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6190 6191 self.raise_error("Expected AS after CAST") 6192 6193 fmt = None 6194 to = self._parse_types() 6195 6196 default = self._match(TokenType.DEFAULT) 6197 if default: 6198 default = self._parse_bitwise() 6199 self._match_text_seq("ON", "CONVERSION", "ERROR") 6200 6201 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6202 fmt_string = self._parse_string() 6203 fmt = self._parse_at_time_zone(fmt_string) 6204 6205 if not to: 6206 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6207 if to.this in exp.DataType.TEMPORAL_TYPES: 6208 this = self.expression( 6209 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6210 this=this, 6211 format=exp.Literal.string( 6212 format_time( 6213 fmt_string.this if fmt_string else "", 6214 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6215 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6216 ) 6217 ), 6218 safe=safe, 6219 ) 6220 6221 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6222 this.set("zone", fmt.args["zone"]) 6223 return this 6224 elif not to: 6225 self.raise_error("Expected TYPE after CAST") 6226 elif isinstance(to, exp.Identifier): 6227 to = exp.DataType.build(to.name, udt=True) 6228 elif to.this == exp.DataType.Type.CHAR: 6229 if self._match(TokenType.CHARACTER_SET): 6230 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6231 6232 return self.expression( 6233 exp.Cast if strict else exp.TryCast, 6234 this=this, 6235 to=to, 6236 format=fmt, 6237 safe=safe, 6238 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6239 default=default, 6240 ) 6241 6242 def _parse_string_agg(self) -> exp.GroupConcat: 6243 if self._match(TokenType.DISTINCT): 6244 args: t.List[t.Optional[exp.Expression]] = [ 6245 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6246 ] 6247 if self._match(TokenType.COMMA): 6248 args.extend(self._parse_csv(self._parse_assignment)) 6249 else: 6250 args = self._parse_csv(self._parse_assignment) # type: ignore 6251 6252 if self._match_text_seq("ON", "OVERFLOW"): 6253 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6254 if self._match_text_seq("ERROR"): 6255 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6256 else: 6257 self._match_text_seq("TRUNCATE") 6258 on_overflow = self.expression( 6259 exp.OverflowTruncateBehavior, 6260 this=self._parse_string(), 6261 with_count=( 6262 self._match_text_seq("WITH", "COUNT") 6263 or not self._match_text_seq("WITHOUT", "COUNT") 6264 ), 6265 ) 6266 else: 6267 on_overflow = None 6268 6269 index = self._index 6270 if not self._match(TokenType.R_PAREN) and args: 6271 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6272 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6273 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6274 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6275 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6276 6277 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6278 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6279 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6280 if not self._match_text_seq("WITHIN", "GROUP"): 6281 self._retreat(index) 6282 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6283 6284 # The corresponding match_r_paren will be called in parse_function (caller) 6285 self._match_l_paren() 6286 6287 return self.expression( 6288 exp.GroupConcat, 6289 this=self._parse_order(this=seq_get(args, 0)), 6290 separator=seq_get(args, 1), 6291 on_overflow=on_overflow, 6292 ) 6293 6294 def _parse_convert( 6295 self, strict: bool, safe: t.Optional[bool] = None 6296 ) -> t.Optional[exp.Expression]: 6297 this = self._parse_bitwise() 6298 6299 if self._match(TokenType.USING): 6300 to: t.Optional[exp.Expression] = self.expression( 6301 exp.CharacterSet, this=self._parse_var() 6302 ) 6303 elif self._match(TokenType.COMMA): 6304 to = self._parse_types() 6305 else: 6306 to = None 6307 6308 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6309 6310 def _parse_xml_table(self) -> exp.XMLTable: 6311 namespaces = None 6312 passing = None 6313 columns = None 6314 6315 if self._match_text_seq("XMLNAMESPACES", "("): 6316 namespaces = self._parse_xml_namespace() 6317 self._match_text_seq(")", ",") 6318 6319 this = self._parse_string() 6320 6321 if self._match_text_seq("PASSING"): 6322 # The BY VALUE keywords are optional and are provided for semantic clarity 6323 self._match_text_seq("BY", "VALUE") 6324 passing = self._parse_csv(self._parse_column) 6325 6326 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6327 6328 if self._match_text_seq("COLUMNS"): 6329 columns = self._parse_csv(self._parse_field_def) 6330 6331 return self.expression( 6332 exp.XMLTable, 6333 this=this, 6334 namespaces=namespaces, 6335 passing=passing, 6336 columns=columns, 6337 by_ref=by_ref, 6338 ) 6339 6340 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6341 namespaces = [] 6342 6343 while True: 6344 if self._match(TokenType.DEFAULT): 6345 uri = self._parse_string() 6346 else: 6347 uri = self._parse_alias(self._parse_string()) 6348 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6349 if not self._match(TokenType.COMMA): 6350 break 6351 6352 return namespaces 6353 6354 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6355 """ 6356 There are generally two variants of the DECODE function: 6357 6358 - DECODE(bin, charset) 6359 - DECODE(expression, search, result [, search, result] ... [, default]) 6360 6361 The second variant will always be parsed into a CASE expression. Note that NULL 6362 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6363 instead of relying on pattern matching. 6364 """ 6365 args = self._parse_csv(self._parse_assignment) 6366 6367 if len(args) < 3: 6368 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6369 6370 expression, *expressions = args 6371 if not expression: 6372 return None 6373 6374 ifs = [] 6375 for search, result in zip(expressions[::2], expressions[1::2]): 6376 if not search or not result: 6377 return None 6378 6379 if isinstance(search, exp.Literal): 6380 ifs.append( 6381 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6382 ) 6383 elif isinstance(search, exp.Null): 6384 ifs.append( 6385 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6386 ) 6387 else: 6388 cond = exp.or_( 6389 exp.EQ(this=expression.copy(), expression=search), 6390 exp.and_( 6391 exp.Is(this=expression.copy(), expression=exp.Null()), 6392 exp.Is(this=search.copy(), expression=exp.Null()), 6393 copy=False, 6394 ), 6395 copy=False, 6396 ) 6397 ifs.append(exp.If(this=cond, true=result)) 6398 6399 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6400 6401 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6402 self._match_text_seq("KEY") 6403 key = self._parse_column() 6404 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6405 self._match_text_seq("VALUE") 6406 value = self._parse_bitwise() 6407 6408 if not key and not value: 6409 return None 6410 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6411 6412 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6413 if not this or not self._match_text_seq("FORMAT", "JSON"): 6414 return this 6415 6416 return self.expression(exp.FormatJson, this=this) 6417 6418 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6419 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6420 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6421 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6422 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6423 else: 6424 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6425 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6426 6427 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6428 6429 if not empty and not error and not null: 6430 return None 6431 6432 return self.expression( 6433 exp.OnCondition, 6434 empty=empty, 6435 error=error, 6436 null=null, 6437 ) 6438 6439 def _parse_on_handling( 6440 self, on: str, *values: str 6441 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6442 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6443 for value in values: 6444 if self._match_text_seq(value, "ON", on): 6445 return f"{value} ON {on}" 6446 6447 index = self._index 6448 if self._match(TokenType.DEFAULT): 6449 default_value = self._parse_bitwise() 6450 if self._match_text_seq("ON", on): 6451 return default_value 6452 6453 self._retreat(index) 6454 6455 return None 6456 6457 @t.overload 6458 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6459 6460 @t.overload 6461 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6462 6463 def _parse_json_object(self, agg=False): 6464 star = self._parse_star() 6465 expressions = ( 6466 [star] 6467 if star 6468 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6469 ) 6470 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6471 6472 unique_keys = None 6473 if self._match_text_seq("WITH", "UNIQUE"): 6474 unique_keys = True 6475 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6476 unique_keys = False 6477 6478 self._match_text_seq("KEYS") 6479 6480 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6481 self._parse_type() 6482 ) 6483 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6484 6485 return self.expression( 6486 exp.JSONObjectAgg if agg else exp.JSONObject, 6487 expressions=expressions, 6488 null_handling=null_handling, 6489 unique_keys=unique_keys, 6490 return_type=return_type, 6491 encoding=encoding, 6492 ) 6493 6494 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6495 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6496 if not self._match_text_seq("NESTED"): 6497 this = self._parse_id_var() 6498 kind = self._parse_types(allow_identifiers=False) 6499 nested = None 6500 else: 6501 this = None 6502 kind = None 6503 nested = True 6504 6505 path = self._match_text_seq("PATH") and self._parse_string() 6506 nested_schema = nested and self._parse_json_schema() 6507 6508 return self.expression( 6509 exp.JSONColumnDef, 6510 this=this, 6511 kind=kind, 6512 path=path, 6513 nested_schema=nested_schema, 6514 ) 6515 6516 def _parse_json_schema(self) -> exp.JSONSchema: 6517 self._match_text_seq("COLUMNS") 6518 return self.expression( 6519 exp.JSONSchema, 6520 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6521 ) 6522 6523 def _parse_json_table(self) -> exp.JSONTable: 6524 this = self._parse_format_json(self._parse_bitwise()) 6525 path = self._match(TokenType.COMMA) and self._parse_string() 6526 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6527 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6528 schema = self._parse_json_schema() 6529 6530 return exp.JSONTable( 6531 this=this, 6532 schema=schema, 6533 path=path, 6534 error_handling=error_handling, 6535 empty_handling=empty_handling, 6536 ) 6537 6538 def _parse_match_against(self) -> exp.MatchAgainst: 6539 expressions = self._parse_csv(self._parse_column) 6540 6541 self._match_text_seq(")", "AGAINST", "(") 6542 6543 this = self._parse_string() 6544 6545 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6546 modifier = "IN NATURAL LANGUAGE MODE" 6547 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6548 modifier = f"{modifier} WITH QUERY EXPANSION" 6549 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6550 modifier = "IN BOOLEAN MODE" 6551 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6552 modifier = "WITH QUERY EXPANSION" 6553 else: 6554 modifier = None 6555 6556 return self.expression( 6557 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6558 ) 6559 6560 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6561 def _parse_open_json(self) -> exp.OpenJSON: 6562 this = self._parse_bitwise() 6563 path = self._match(TokenType.COMMA) and self._parse_string() 6564 6565 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6566 this = self._parse_field(any_token=True) 6567 kind = self._parse_types() 6568 path = self._parse_string() 6569 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6570 6571 return self.expression( 6572 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6573 ) 6574 6575 expressions = None 6576 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6577 self._match_l_paren() 6578 expressions = self._parse_csv(_parse_open_json_column_def) 6579 6580 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6581 6582 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6583 args = self._parse_csv(self._parse_bitwise) 6584 6585 if self._match(TokenType.IN): 6586 return self.expression( 6587 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6588 ) 6589 6590 if haystack_first: 6591 haystack = seq_get(args, 0) 6592 needle = seq_get(args, 1) 6593 else: 6594 haystack = seq_get(args, 1) 6595 needle = seq_get(args, 0) 6596 6597 return self.expression( 6598 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6599 ) 6600 6601 def _parse_predict(self) -> exp.Predict: 6602 self._match_text_seq("MODEL") 6603 this = self._parse_table() 6604 6605 self._match(TokenType.COMMA) 6606 self._match_text_seq("TABLE") 6607 6608 return self.expression( 6609 exp.Predict, 6610 this=this, 6611 expression=self._parse_table(), 6612 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6613 ) 6614 6615 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6616 args = self._parse_csv(self._parse_table) 6617 return exp.JoinHint(this=func_name.upper(), expressions=args) 6618 6619 def _parse_substring(self) -> exp.Substring: 6620 # Postgres supports the form: substring(string [from int] [for int]) 6621 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6622 6623 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6624 6625 if self._match(TokenType.FROM): 6626 args.append(self._parse_bitwise()) 6627 if self._match(TokenType.FOR): 6628 if len(args) == 1: 6629 args.append(exp.Literal.number(1)) 6630 args.append(self._parse_bitwise()) 6631 6632 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6633 6634 def _parse_trim(self) -> exp.Trim: 6635 # https://www.w3resource.com/sql/character-functions/trim.php 6636 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6637 6638 position = None 6639 collation = None 6640 expression = None 6641 6642 if self._match_texts(self.TRIM_TYPES): 6643 position = self._prev.text.upper() 6644 6645 this = self._parse_bitwise() 6646 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6647 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6648 expression = self._parse_bitwise() 6649 6650 if invert_order: 6651 this, expression = expression, this 6652 6653 if self._match(TokenType.COLLATE): 6654 collation = self._parse_bitwise() 6655 6656 return self.expression( 6657 exp.Trim, this=this, position=position, expression=expression, collation=collation 6658 ) 6659 6660 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6661 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6662 6663 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6664 return self._parse_window(self._parse_id_var(), alias=True) 6665 6666 def _parse_respect_or_ignore_nulls( 6667 self, this: t.Optional[exp.Expression] 6668 ) -> t.Optional[exp.Expression]: 6669 if self._match_text_seq("IGNORE", "NULLS"): 6670 return self.expression(exp.IgnoreNulls, this=this) 6671 if self._match_text_seq("RESPECT", "NULLS"): 6672 return self.expression(exp.RespectNulls, this=this) 6673 return this 6674 6675 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6676 if self._match(TokenType.HAVING): 6677 self._match_texts(("MAX", "MIN")) 6678 max = self._prev.text.upper() != "MIN" 6679 return self.expression( 6680 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6681 ) 6682 6683 return this 6684 6685 def _parse_window( 6686 self, this: t.Optional[exp.Expression], alias: bool = False 6687 ) -> t.Optional[exp.Expression]: 6688 func = this 6689 comments = func.comments if isinstance(func, exp.Expression) else None 6690 6691 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6692 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6693 if self._match_text_seq("WITHIN", "GROUP"): 6694 order = self._parse_wrapped(self._parse_order) 6695 this = self.expression(exp.WithinGroup, this=this, expression=order) 6696 6697 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6698 self._match(TokenType.WHERE) 6699 this = self.expression( 6700 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6701 ) 6702 self._match_r_paren() 6703 6704 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6705 # Some dialects choose to implement and some do not. 6706 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6707 6708 # There is some code above in _parse_lambda that handles 6709 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6710 6711 # The below changes handle 6712 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6713 6714 # Oracle allows both formats 6715 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6716 # and Snowflake chose to do the same for familiarity 6717 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6718 if isinstance(this, exp.AggFunc): 6719 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6720 6721 if ignore_respect and ignore_respect is not this: 6722 ignore_respect.replace(ignore_respect.this) 6723 this = self.expression(ignore_respect.__class__, this=this) 6724 6725 this = self._parse_respect_or_ignore_nulls(this) 6726 6727 # bigquery select from window x AS (partition by ...) 6728 if alias: 6729 over = None 6730 self._match(TokenType.ALIAS) 6731 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6732 return this 6733 else: 6734 over = self._prev.text.upper() 6735 6736 if comments and isinstance(func, exp.Expression): 6737 func.pop_comments() 6738 6739 if not self._match(TokenType.L_PAREN): 6740 return self.expression( 6741 exp.Window, 6742 comments=comments, 6743 this=this, 6744 alias=self._parse_id_var(False), 6745 over=over, 6746 ) 6747 6748 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6749 6750 first = self._match(TokenType.FIRST) 6751 if self._match_text_seq("LAST"): 6752 first = False 6753 6754 partition, order = self._parse_partition_and_order() 6755 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6756 6757 if kind: 6758 self._match(TokenType.BETWEEN) 6759 start = self._parse_window_spec() 6760 self._match(TokenType.AND) 6761 end = self._parse_window_spec() 6762 6763 spec = self.expression( 6764 exp.WindowSpec, 6765 kind=kind, 6766 start=start["value"], 6767 start_side=start["side"], 6768 end=end["value"], 6769 end_side=end["side"], 6770 ) 6771 else: 6772 spec = None 6773 6774 self._match_r_paren() 6775 6776 window = self.expression( 6777 exp.Window, 6778 comments=comments, 6779 this=this, 6780 partition_by=partition, 6781 order=order, 6782 spec=spec, 6783 alias=window_alias, 6784 over=over, 6785 first=first, 6786 ) 6787 6788 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6789 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6790 return self._parse_window(window, alias=alias) 6791 6792 return window 6793 6794 def _parse_partition_and_order( 6795 self, 6796 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6797 return self._parse_partition_by(), self._parse_order() 6798 6799 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6800 self._match(TokenType.BETWEEN) 6801 6802 return { 6803 "value": ( 6804 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6805 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6806 or self._parse_bitwise() 6807 ), 6808 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6809 } 6810 6811 def _parse_alias( 6812 self, this: t.Optional[exp.Expression], explicit: bool = False 6813 ) -> t.Optional[exp.Expression]: 6814 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6815 # so this section tries to parse the clause version and if it fails, it treats the token 6816 # as an identifier (alias) 6817 if self._can_parse_limit_or_offset(): 6818 return this 6819 6820 any_token = self._match(TokenType.ALIAS) 6821 comments = self._prev_comments or [] 6822 6823 if explicit and not any_token: 6824 return this 6825 6826 if self._match(TokenType.L_PAREN): 6827 aliases = self.expression( 6828 exp.Aliases, 6829 comments=comments, 6830 this=this, 6831 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6832 ) 6833 self._match_r_paren(aliases) 6834 return aliases 6835 6836 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6837 self.STRING_ALIASES and self._parse_string_as_identifier() 6838 ) 6839 6840 if alias: 6841 comments.extend(alias.pop_comments()) 6842 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6843 column = this.this 6844 6845 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6846 if not this.comments and column and column.comments: 6847 this.comments = column.pop_comments() 6848 6849 return this 6850 6851 def _parse_id_var( 6852 self, 6853 any_token: bool = True, 6854 tokens: t.Optional[t.Collection[TokenType]] = None, 6855 ) -> t.Optional[exp.Expression]: 6856 expression = self._parse_identifier() 6857 if not expression and ( 6858 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6859 ): 6860 quoted = self._prev.token_type == TokenType.STRING 6861 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6862 6863 return expression 6864 6865 def _parse_string(self) -> t.Optional[exp.Expression]: 6866 if self._match_set(self.STRING_PARSERS): 6867 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6868 return self._parse_placeholder() 6869 6870 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6871 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6872 6873 def _parse_number(self) -> t.Optional[exp.Expression]: 6874 if self._match_set(self.NUMERIC_PARSERS): 6875 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6876 return self._parse_placeholder() 6877 6878 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6879 if self._match(TokenType.IDENTIFIER): 6880 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6881 return self._parse_placeholder() 6882 6883 def _parse_var( 6884 self, 6885 any_token: bool = False, 6886 tokens: t.Optional[t.Collection[TokenType]] = None, 6887 upper: bool = False, 6888 ) -> t.Optional[exp.Expression]: 6889 if ( 6890 (any_token and self._advance_any()) 6891 or self._match(TokenType.VAR) 6892 or (self._match_set(tokens) if tokens else False) 6893 ): 6894 return self.expression( 6895 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6896 ) 6897 return self._parse_placeholder() 6898 6899 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6900 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6901 self._advance() 6902 return self._prev 6903 return None 6904 6905 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6906 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6907 6908 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6909 return self._parse_primary() or self._parse_var(any_token=True) 6910 6911 def _parse_null(self) -> t.Optional[exp.Expression]: 6912 if self._match_set(self.NULL_TOKENS): 6913 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6914 return self._parse_placeholder() 6915 6916 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6917 if self._match(TokenType.TRUE): 6918 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6919 if self._match(TokenType.FALSE): 6920 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6921 return self._parse_placeholder() 6922 6923 def _parse_star(self) -> t.Optional[exp.Expression]: 6924 if self._match(TokenType.STAR): 6925 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6926 return self._parse_placeholder() 6927 6928 def _parse_parameter(self) -> exp.Parameter: 6929 this = self._parse_identifier() or self._parse_primary_or_var() 6930 return self.expression(exp.Parameter, this=this) 6931 6932 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6933 if self._match_set(self.PLACEHOLDER_PARSERS): 6934 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6935 if placeholder: 6936 return placeholder 6937 self._advance(-1) 6938 return None 6939 6940 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6941 if not self._match_texts(keywords): 6942 return None 6943 if self._match(TokenType.L_PAREN, advance=False): 6944 return self._parse_wrapped_csv(self._parse_expression) 6945 6946 expression = self._parse_expression() 6947 return [expression] if expression else None 6948 6949 def _parse_csv( 6950 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6951 ) -> t.List[exp.Expression]: 6952 parse_result = parse_method() 6953 items = [parse_result] if parse_result is not None else [] 6954 6955 while self._match(sep): 6956 self._add_comments(parse_result) 6957 parse_result = parse_method() 6958 if parse_result is not None: 6959 items.append(parse_result) 6960 6961 return items 6962 6963 def _parse_tokens( 6964 self, parse_method: t.Callable, expressions: t.Dict 6965 ) -> t.Optional[exp.Expression]: 6966 this = parse_method() 6967 6968 while self._match_set(expressions): 6969 this = self.expression( 6970 expressions[self._prev.token_type], 6971 this=this, 6972 comments=self._prev_comments, 6973 expression=parse_method(), 6974 ) 6975 6976 return this 6977 6978 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6979 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6980 6981 def _parse_wrapped_csv( 6982 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6983 ) -> t.List[exp.Expression]: 6984 return self._parse_wrapped( 6985 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6986 ) 6987 6988 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6989 wrapped = self._match(TokenType.L_PAREN) 6990 if not wrapped and not optional: 6991 self.raise_error("Expecting (") 6992 parse_result = parse_method() 6993 if wrapped: 6994 self._match_r_paren() 6995 return parse_result 6996 6997 def _parse_expressions(self) -> t.List[exp.Expression]: 6998 return self._parse_csv(self._parse_expression) 6999 7000 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7001 return self._parse_select() or self._parse_set_operations( 7002 self._parse_alias(self._parse_assignment(), explicit=True) 7003 if alias 7004 else self._parse_assignment() 7005 ) 7006 7007 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7008 return self._parse_query_modifiers( 7009 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7010 ) 7011 7012 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7013 this = None 7014 if self._match_texts(self.TRANSACTION_KIND): 7015 this = self._prev.text 7016 7017 self._match_texts(("TRANSACTION", "WORK")) 7018 7019 modes = [] 7020 while True: 7021 mode = [] 7022 while self._match(TokenType.VAR): 7023 mode.append(self._prev.text) 7024 7025 if mode: 7026 modes.append(" ".join(mode)) 7027 if not self._match(TokenType.COMMA): 7028 break 7029 7030 return self.expression(exp.Transaction, this=this, modes=modes) 7031 7032 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7033 chain = None 7034 savepoint = None 7035 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7036 7037 self._match_texts(("TRANSACTION", "WORK")) 7038 7039 if self._match_text_seq("TO"): 7040 self._match_text_seq("SAVEPOINT") 7041 savepoint = self._parse_id_var() 7042 7043 if self._match(TokenType.AND): 7044 chain = not self._match_text_seq("NO") 7045 self._match_text_seq("CHAIN") 7046 7047 if is_rollback: 7048 return self.expression(exp.Rollback, savepoint=savepoint) 7049 7050 return self.expression(exp.Commit, chain=chain) 7051 7052 def _parse_refresh(self) -> exp.Refresh: 7053 self._match(TokenType.TABLE) 7054 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7055 7056 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7057 if not self._match_text_seq("ADD"): 7058 return None 7059 7060 self._match(TokenType.COLUMN) 7061 exists_column = self._parse_exists(not_=True) 7062 expression = self._parse_field_def() 7063 7064 if expression: 7065 expression.set("exists", exists_column) 7066 7067 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7068 if self._match_texts(("FIRST", "AFTER")): 7069 position = self._prev.text 7070 column_position = self.expression( 7071 exp.ColumnPosition, this=self._parse_column(), position=position 7072 ) 7073 expression.set("position", column_position) 7074 7075 return expression 7076 7077 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7078 drop = self._match(TokenType.DROP) and self._parse_drop() 7079 if drop and not isinstance(drop, exp.Command): 7080 drop.set("kind", drop.args.get("kind", "COLUMN")) 7081 return drop 7082 7083 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7084 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7085 return self.expression( 7086 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7087 ) 7088 7089 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7090 index = self._index - 1 7091 7092 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7093 return self._parse_csv( 7094 lambda: self.expression( 7095 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7096 ) 7097 ) 7098 7099 self._retreat(index) 7100 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7101 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7102 7103 if self._match_text_seq("ADD", "COLUMNS"): 7104 schema = self._parse_schema() 7105 if schema: 7106 return [schema] 7107 return [] 7108 7109 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7110 7111 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7112 if self._match_texts(self.ALTER_ALTER_PARSERS): 7113 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7114 7115 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7116 # keyword after ALTER we default to parsing this statement 7117 self._match(TokenType.COLUMN) 7118 column = self._parse_field(any_token=True) 7119 7120 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7121 return self.expression(exp.AlterColumn, this=column, drop=True) 7122 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7123 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7124 if self._match(TokenType.COMMENT): 7125 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7126 if self._match_text_seq("DROP", "NOT", "NULL"): 7127 return self.expression( 7128 exp.AlterColumn, 7129 this=column, 7130 drop=True, 7131 allow_null=True, 7132 ) 7133 if self._match_text_seq("SET", "NOT", "NULL"): 7134 return self.expression( 7135 exp.AlterColumn, 7136 this=column, 7137 allow_null=False, 7138 ) 7139 7140 if self._match_text_seq("SET", "VISIBLE"): 7141 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7142 if self._match_text_seq("SET", "INVISIBLE"): 7143 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7144 7145 self._match_text_seq("SET", "DATA") 7146 self._match_text_seq("TYPE") 7147 return self.expression( 7148 exp.AlterColumn, 7149 this=column, 7150 dtype=self._parse_types(), 7151 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7152 using=self._match(TokenType.USING) and self._parse_assignment(), 7153 ) 7154 7155 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7156 if self._match_texts(("ALL", "EVEN", "AUTO")): 7157 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7158 7159 self._match_text_seq("KEY", "DISTKEY") 7160 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7161 7162 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7163 if compound: 7164 self._match_text_seq("SORTKEY") 7165 7166 if self._match(TokenType.L_PAREN, advance=False): 7167 return self.expression( 7168 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7169 ) 7170 7171 self._match_texts(("AUTO", "NONE")) 7172 return self.expression( 7173 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7174 ) 7175 7176 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7177 index = self._index - 1 7178 7179 partition_exists = self._parse_exists() 7180 if self._match(TokenType.PARTITION, advance=False): 7181 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7182 7183 self._retreat(index) 7184 return self._parse_csv(self._parse_drop_column) 7185 7186 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7187 if self._match(TokenType.COLUMN): 7188 exists = self._parse_exists() 7189 old_column = self._parse_column() 7190 to = self._match_text_seq("TO") 7191 new_column = self._parse_column() 7192 7193 if old_column is None or to is None or new_column is None: 7194 return None 7195 7196 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7197 7198 self._match_text_seq("TO") 7199 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7200 7201 def _parse_alter_table_set(self) -> exp.AlterSet: 7202 alter_set = self.expression(exp.AlterSet) 7203 7204 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7205 "TABLE", "PROPERTIES" 7206 ): 7207 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7208 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7209 alter_set.set("expressions", [self._parse_assignment()]) 7210 elif self._match_texts(("LOGGED", "UNLOGGED")): 7211 alter_set.set("option", exp.var(self._prev.text.upper())) 7212 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7213 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7214 elif self._match_text_seq("LOCATION"): 7215 alter_set.set("location", self._parse_field()) 7216 elif self._match_text_seq("ACCESS", "METHOD"): 7217 alter_set.set("access_method", self._parse_field()) 7218 elif self._match_text_seq("TABLESPACE"): 7219 alter_set.set("tablespace", self._parse_field()) 7220 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7221 alter_set.set("file_format", [self._parse_field()]) 7222 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7223 alter_set.set("file_format", self._parse_wrapped_options()) 7224 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7225 alter_set.set("copy_options", self._parse_wrapped_options()) 7226 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7227 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7228 else: 7229 if self._match_text_seq("SERDE"): 7230 alter_set.set("serde", self._parse_field()) 7231 7232 alter_set.set("expressions", [self._parse_properties()]) 7233 7234 return alter_set 7235 7236 def _parse_alter(self) -> exp.Alter | exp.Command: 7237 start = self._prev 7238 7239 alter_token = self._match_set(self.ALTERABLES) and self._prev 7240 if not alter_token: 7241 return self._parse_as_command(start) 7242 7243 exists = self._parse_exists() 7244 only = self._match_text_seq("ONLY") 7245 this = self._parse_table(schema=True) 7246 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7247 7248 if self._next: 7249 self._advance() 7250 7251 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7252 if parser: 7253 actions = ensure_list(parser(self)) 7254 not_valid = self._match_text_seq("NOT", "VALID") 7255 options = self._parse_csv(self._parse_property) 7256 7257 if not self._curr and actions: 7258 return self.expression( 7259 exp.Alter, 7260 this=this, 7261 kind=alter_token.text.upper(), 7262 exists=exists, 7263 actions=actions, 7264 only=only, 7265 options=options, 7266 cluster=cluster, 7267 not_valid=not_valid, 7268 ) 7269 7270 return self._parse_as_command(start) 7271 7272 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7273 start = self._prev 7274 # https://duckdb.org/docs/sql/statements/analyze 7275 if not self._curr: 7276 return self.expression(exp.Analyze) 7277 7278 options = [] 7279 while self._match_texts(self.ANALYZE_STYLES): 7280 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7281 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7282 else: 7283 options.append(self._prev.text.upper()) 7284 7285 this: t.Optional[exp.Expression] = None 7286 inner_expression: t.Optional[exp.Expression] = None 7287 7288 kind = self._curr and self._curr.text.upper() 7289 7290 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7291 this = self._parse_table_parts() 7292 elif self._match_text_seq("TABLES"): 7293 if self._match_set((TokenType.FROM, TokenType.IN)): 7294 kind = f"{kind} {self._prev.text.upper()}" 7295 this = self._parse_table(schema=True, is_db_reference=True) 7296 elif self._match_text_seq("DATABASE"): 7297 this = self._parse_table(schema=True, is_db_reference=True) 7298 elif self._match_text_seq("CLUSTER"): 7299 this = self._parse_table() 7300 # Try matching inner expr keywords before fallback to parse table. 7301 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7302 kind = None 7303 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7304 else: 7305 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7306 kind = None 7307 this = self._parse_table_parts() 7308 7309 partition = self._try_parse(self._parse_partition) 7310 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7311 return self._parse_as_command(start) 7312 7313 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7314 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7315 "WITH", "ASYNC", "MODE" 7316 ): 7317 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7318 else: 7319 mode = None 7320 7321 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7322 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7323 7324 properties = self._parse_properties() 7325 return self.expression( 7326 exp.Analyze, 7327 kind=kind, 7328 this=this, 7329 mode=mode, 7330 partition=partition, 7331 properties=properties, 7332 expression=inner_expression, 7333 options=options, 7334 ) 7335 7336 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7337 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7338 this = None 7339 kind = self._prev.text.upper() 7340 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7341 expressions = [] 7342 7343 if not self._match_text_seq("STATISTICS"): 7344 self.raise_error("Expecting token STATISTICS") 7345 7346 if self._match_text_seq("NOSCAN"): 7347 this = "NOSCAN" 7348 elif self._match(TokenType.FOR): 7349 if self._match_text_seq("ALL", "COLUMNS"): 7350 this = "FOR ALL COLUMNS" 7351 if self._match_texts("COLUMNS"): 7352 this = "FOR COLUMNS" 7353 expressions = self._parse_csv(self._parse_column_reference) 7354 elif self._match_text_seq("SAMPLE"): 7355 sample = self._parse_number() 7356 expressions = [ 7357 self.expression( 7358 exp.AnalyzeSample, 7359 sample=sample, 7360 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7361 ) 7362 ] 7363 7364 return self.expression( 7365 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7366 ) 7367 7368 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7369 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7370 kind = None 7371 this = None 7372 expression: t.Optional[exp.Expression] = None 7373 if self._match_text_seq("REF", "UPDATE"): 7374 kind = "REF" 7375 this = "UPDATE" 7376 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7377 this = "UPDATE SET DANGLING TO NULL" 7378 elif self._match_text_seq("STRUCTURE"): 7379 kind = "STRUCTURE" 7380 if self._match_text_seq("CASCADE", "FAST"): 7381 this = "CASCADE FAST" 7382 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7383 ("ONLINE", "OFFLINE") 7384 ): 7385 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7386 expression = self._parse_into() 7387 7388 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7389 7390 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7391 this = self._prev.text.upper() 7392 if self._match_text_seq("COLUMNS"): 7393 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7394 return None 7395 7396 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7397 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7398 if self._match_text_seq("STATISTICS"): 7399 return self.expression(exp.AnalyzeDelete, kind=kind) 7400 return None 7401 7402 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7403 if self._match_text_seq("CHAINED", "ROWS"): 7404 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7405 return None 7406 7407 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7408 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7409 this = self._prev.text.upper() 7410 expression: t.Optional[exp.Expression] = None 7411 expressions = [] 7412 update_options = None 7413 7414 if self._match_text_seq("HISTOGRAM", "ON"): 7415 expressions = self._parse_csv(self._parse_column_reference) 7416 with_expressions = [] 7417 while self._match(TokenType.WITH): 7418 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7419 if self._match_texts(("SYNC", "ASYNC")): 7420 if self._match_text_seq("MODE", advance=False): 7421 with_expressions.append(f"{self._prev.text.upper()} MODE") 7422 self._advance() 7423 else: 7424 buckets = self._parse_number() 7425 if self._match_text_seq("BUCKETS"): 7426 with_expressions.append(f"{buckets} BUCKETS") 7427 if with_expressions: 7428 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7429 7430 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7431 TokenType.UPDATE, advance=False 7432 ): 7433 update_options = self._prev.text.upper() 7434 self._advance() 7435 elif self._match_text_seq("USING", "DATA"): 7436 expression = self.expression(exp.UsingData, this=self._parse_string()) 7437 7438 return self.expression( 7439 exp.AnalyzeHistogram, 7440 this=this, 7441 expressions=expressions, 7442 expression=expression, 7443 update_options=update_options, 7444 ) 7445 7446 def _parse_merge(self) -> exp.Merge: 7447 self._match(TokenType.INTO) 7448 target = self._parse_table() 7449 7450 if target and self._match(TokenType.ALIAS, advance=False): 7451 target.set("alias", self._parse_table_alias()) 7452 7453 self._match(TokenType.USING) 7454 using = self._parse_table() 7455 7456 self._match(TokenType.ON) 7457 on = self._parse_assignment() 7458 7459 return self.expression( 7460 exp.Merge, 7461 this=target, 7462 using=using, 7463 on=on, 7464 whens=self._parse_when_matched(), 7465 returning=self._parse_returning(), 7466 ) 7467 7468 def _parse_when_matched(self) -> exp.Whens: 7469 whens = [] 7470 7471 while self._match(TokenType.WHEN): 7472 matched = not self._match(TokenType.NOT) 7473 self._match_text_seq("MATCHED") 7474 source = ( 7475 False 7476 if self._match_text_seq("BY", "TARGET") 7477 else self._match_text_seq("BY", "SOURCE") 7478 ) 7479 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7480 7481 self._match(TokenType.THEN) 7482 7483 if self._match(TokenType.INSERT): 7484 this = self._parse_star() 7485 if this: 7486 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7487 else: 7488 then = self.expression( 7489 exp.Insert, 7490 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7491 expression=self._match_text_seq("VALUES") and self._parse_value(), 7492 ) 7493 elif self._match(TokenType.UPDATE): 7494 expressions = self._parse_star() 7495 if expressions: 7496 then = self.expression(exp.Update, expressions=expressions) 7497 else: 7498 then = self.expression( 7499 exp.Update, 7500 expressions=self._match(TokenType.SET) 7501 and self._parse_csv(self._parse_equality), 7502 ) 7503 elif self._match(TokenType.DELETE): 7504 then = self.expression(exp.Var, this=self._prev.text) 7505 else: 7506 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7507 7508 whens.append( 7509 self.expression( 7510 exp.When, 7511 matched=matched, 7512 source=source, 7513 condition=condition, 7514 then=then, 7515 ) 7516 ) 7517 return self.expression(exp.Whens, expressions=whens) 7518 7519 def _parse_show(self) -> t.Optional[exp.Expression]: 7520 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7521 if parser: 7522 return parser(self) 7523 return self._parse_as_command(self._prev) 7524 7525 def _parse_set_item_assignment( 7526 self, kind: t.Optional[str] = None 7527 ) -> t.Optional[exp.Expression]: 7528 index = self._index 7529 7530 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7531 return self._parse_set_transaction(global_=kind == "GLOBAL") 7532 7533 left = self._parse_primary() or self._parse_column() 7534 assignment_delimiter = self._match_texts(("=", "TO")) 7535 7536 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7537 self._retreat(index) 7538 return None 7539 7540 right = self._parse_statement() or self._parse_id_var() 7541 if isinstance(right, (exp.Column, exp.Identifier)): 7542 right = exp.var(right.name) 7543 7544 this = self.expression(exp.EQ, this=left, expression=right) 7545 return self.expression(exp.SetItem, this=this, kind=kind) 7546 7547 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7548 self._match_text_seq("TRANSACTION") 7549 characteristics = self._parse_csv( 7550 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7551 ) 7552 return self.expression( 7553 exp.SetItem, 7554 expressions=characteristics, 7555 kind="TRANSACTION", 7556 **{"global": global_}, # type: ignore 7557 ) 7558 7559 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7560 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7561 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7562 7563 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7564 index = self._index 7565 set_ = self.expression( 7566 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7567 ) 7568 7569 if self._curr: 7570 self._retreat(index) 7571 return self._parse_as_command(self._prev) 7572 7573 return set_ 7574 7575 def _parse_var_from_options( 7576 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7577 ) -> t.Optional[exp.Var]: 7578 start = self._curr 7579 if not start: 7580 return None 7581 7582 option = start.text.upper() 7583 continuations = options.get(option) 7584 7585 index = self._index 7586 self._advance() 7587 for keywords in continuations or []: 7588 if isinstance(keywords, str): 7589 keywords = (keywords,) 7590 7591 if self._match_text_seq(*keywords): 7592 option = f"{option} {' '.join(keywords)}" 7593 break 7594 else: 7595 if continuations or continuations is None: 7596 if raise_unmatched: 7597 self.raise_error(f"Unknown option {option}") 7598 7599 self._retreat(index) 7600 return None 7601 7602 return exp.var(option) 7603 7604 def _parse_as_command(self, start: Token) -> exp.Command: 7605 while self._curr: 7606 self._advance() 7607 text = self._find_sql(start, self._prev) 7608 size = len(start.text) 7609 self._warn_unsupported() 7610 return exp.Command(this=text[:size], expression=text[size:]) 7611 7612 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7613 settings = [] 7614 7615 self._match_l_paren() 7616 kind = self._parse_id_var() 7617 7618 if self._match(TokenType.L_PAREN): 7619 while True: 7620 key = self._parse_id_var() 7621 value = self._parse_primary() 7622 if not key and value is None: 7623 break 7624 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7625 self._match(TokenType.R_PAREN) 7626 7627 self._match_r_paren() 7628 7629 return self.expression( 7630 exp.DictProperty, 7631 this=this, 7632 kind=kind.this if kind else None, 7633 settings=settings, 7634 ) 7635 7636 def _parse_dict_range(self, this: str) -> exp.DictRange: 7637 self._match_l_paren() 7638 has_min = self._match_text_seq("MIN") 7639 if has_min: 7640 min = self._parse_var() or self._parse_primary() 7641 self._match_text_seq("MAX") 7642 max = self._parse_var() or self._parse_primary() 7643 else: 7644 max = self._parse_var() or self._parse_primary() 7645 min = exp.Literal.number(0) 7646 self._match_r_paren() 7647 return self.expression(exp.DictRange, this=this, min=min, max=max) 7648 7649 def _parse_comprehension( 7650 self, this: t.Optional[exp.Expression] 7651 ) -> t.Optional[exp.Comprehension]: 7652 index = self._index 7653 expression = self._parse_column() 7654 if not self._match(TokenType.IN): 7655 self._retreat(index - 1) 7656 return None 7657 iterator = self._parse_column() 7658 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7659 return self.expression( 7660 exp.Comprehension, 7661 this=this, 7662 expression=expression, 7663 iterator=iterator, 7664 condition=condition, 7665 ) 7666 7667 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7668 if self._match(TokenType.HEREDOC_STRING): 7669 return self.expression(exp.Heredoc, this=self._prev.text) 7670 7671 if not self._match_text_seq("$"): 7672 return None 7673 7674 tags = ["$"] 7675 tag_text = None 7676 7677 if self._is_connected(): 7678 self._advance() 7679 tags.append(self._prev.text.upper()) 7680 else: 7681 self.raise_error("No closing $ found") 7682 7683 if tags[-1] != "$": 7684 if self._is_connected() and self._match_text_seq("$"): 7685 tag_text = tags[-1] 7686 tags.append("$") 7687 else: 7688 self.raise_error("No closing $ found") 7689 7690 heredoc_start = self._curr 7691 7692 while self._curr: 7693 if self._match_text_seq(*tags, advance=False): 7694 this = self._find_sql(heredoc_start, self._prev) 7695 self._advance(len(tags)) 7696 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7697 7698 self._advance() 7699 7700 self.raise_error(f"No closing {''.join(tags)} found") 7701 return None 7702 7703 def _find_parser( 7704 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7705 ) -> t.Optional[t.Callable]: 7706 if not self._curr: 7707 return None 7708 7709 index = self._index 7710 this = [] 7711 while True: 7712 # The current token might be multiple words 7713 curr = self._curr.text.upper() 7714 key = curr.split(" ") 7715 this.append(curr) 7716 7717 self._advance() 7718 result, trie = in_trie(trie, key) 7719 if result == TrieResult.FAILED: 7720 break 7721 7722 if result == TrieResult.EXISTS: 7723 subparser = parsers[" ".join(this)] 7724 return subparser 7725 7726 self._retreat(index) 7727 return None 7728 7729 def _match(self, token_type, advance=True, expression=None): 7730 if not self._curr: 7731 return None 7732 7733 if self._curr.token_type == token_type: 7734 if advance: 7735 self._advance() 7736 self._add_comments(expression) 7737 return True 7738 7739 return None 7740 7741 def _match_set(self, types, advance=True): 7742 if not self._curr: 7743 return None 7744 7745 if self._curr.token_type in types: 7746 if advance: 7747 self._advance() 7748 return True 7749 7750 return None 7751 7752 def _match_pair(self, token_type_a, token_type_b, advance=True): 7753 if not self._curr or not self._next: 7754 return None 7755 7756 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7757 if advance: 7758 self._advance(2) 7759 return True 7760 7761 return None 7762 7763 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7764 if not self._match(TokenType.L_PAREN, expression=expression): 7765 self.raise_error("Expecting (") 7766 7767 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7768 if not self._match(TokenType.R_PAREN, expression=expression): 7769 self.raise_error("Expecting )") 7770 7771 def _match_texts(self, texts, advance=True): 7772 if ( 7773 self._curr 7774 and self._curr.token_type != TokenType.STRING 7775 and self._curr.text.upper() in texts 7776 ): 7777 if advance: 7778 self._advance() 7779 return True 7780 return None 7781 7782 def _match_text_seq(self, *texts, advance=True): 7783 index = self._index 7784 for text in texts: 7785 if ( 7786 self._curr 7787 and self._curr.token_type != TokenType.STRING 7788 and self._curr.text.upper() == text 7789 ): 7790 self._advance() 7791 else: 7792 self._retreat(index) 7793 return None 7794 7795 if not advance: 7796 self._retreat(index) 7797 7798 return True 7799 7800 def _replace_lambda( 7801 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7802 ) -> t.Optional[exp.Expression]: 7803 if not node: 7804 return node 7805 7806 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7807 7808 for column in node.find_all(exp.Column): 7809 typ = lambda_types.get(column.parts[0].name) 7810 if typ is not None: 7811 dot_or_id = column.to_dot() if column.table else column.this 7812 7813 if typ: 7814 dot_or_id = self.expression( 7815 exp.Cast, 7816 this=dot_or_id, 7817 to=typ, 7818 ) 7819 7820 parent = column.parent 7821 7822 while isinstance(parent, exp.Dot): 7823 if not isinstance(parent.parent, exp.Dot): 7824 parent.replace(dot_or_id) 7825 break 7826 parent = parent.parent 7827 else: 7828 if column is node: 7829 node = dot_or_id 7830 else: 7831 column.replace(dot_or_id) 7832 return node 7833 7834 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7835 start = self._prev 7836 7837 # Not to be confused with TRUNCATE(number, decimals) function call 7838 if self._match(TokenType.L_PAREN): 7839 self._retreat(self._index - 2) 7840 return self._parse_function() 7841 7842 # Clickhouse supports TRUNCATE DATABASE as well 7843 is_database = self._match(TokenType.DATABASE) 7844 7845 self._match(TokenType.TABLE) 7846 7847 exists = self._parse_exists(not_=False) 7848 7849 expressions = self._parse_csv( 7850 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7851 ) 7852 7853 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7854 7855 if self._match_text_seq("RESTART", "IDENTITY"): 7856 identity = "RESTART" 7857 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7858 identity = "CONTINUE" 7859 else: 7860 identity = None 7861 7862 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7863 option = self._prev.text 7864 else: 7865 option = None 7866 7867 partition = self._parse_partition() 7868 7869 # Fallback case 7870 if self._curr: 7871 return self._parse_as_command(start) 7872 7873 return self.expression( 7874 exp.TruncateTable, 7875 expressions=expressions, 7876 is_database=is_database, 7877 exists=exists, 7878 cluster=cluster, 7879 identity=identity, 7880 option=option, 7881 partition=partition, 7882 ) 7883 7884 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7885 this = self._parse_ordered(self._parse_opclass) 7886 7887 if not self._match(TokenType.WITH): 7888 return this 7889 7890 op = self._parse_var(any_token=True) 7891 7892 return self.expression(exp.WithOperator, this=this, op=op) 7893 7894 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7895 self._match(TokenType.EQ) 7896 self._match(TokenType.L_PAREN) 7897 7898 opts: t.List[t.Optional[exp.Expression]] = [] 7899 while self._curr and not self._match(TokenType.R_PAREN): 7900 if self._match_text_seq("FORMAT_NAME", "="): 7901 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 7902 prop = self.expression( 7903 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_table_parts() 7904 ) 7905 opts.append(prop) 7906 else: 7907 opts.append(self._parse_property()) 7908 7909 self._match(TokenType.COMMA) 7910 7911 return opts 7912 7913 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7914 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7915 7916 options = [] 7917 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7918 option = self._parse_var(any_token=True) 7919 prev = self._prev.text.upper() 7920 7921 # Different dialects might separate options and values by white space, "=" and "AS" 7922 self._match(TokenType.EQ) 7923 self._match(TokenType.ALIAS) 7924 7925 param = self.expression(exp.CopyParameter, this=option) 7926 7927 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7928 TokenType.L_PAREN, advance=False 7929 ): 7930 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7931 param.set("expressions", self._parse_wrapped_options()) 7932 elif prev == "FILE_FORMAT": 7933 # T-SQL's external file format case 7934 param.set("expression", self._parse_field()) 7935 else: 7936 param.set("expression", self._parse_unquoted_field()) 7937 7938 options.append(param) 7939 self._match(sep) 7940 7941 return options 7942 7943 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7944 expr = self.expression(exp.Credentials) 7945 7946 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7947 expr.set("storage", self._parse_field()) 7948 if self._match_text_seq("CREDENTIALS"): 7949 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7950 creds = ( 7951 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7952 ) 7953 expr.set("credentials", creds) 7954 if self._match_text_seq("ENCRYPTION"): 7955 expr.set("encryption", self._parse_wrapped_options()) 7956 if self._match_text_seq("IAM_ROLE"): 7957 expr.set("iam_role", self._parse_field()) 7958 if self._match_text_seq("REGION"): 7959 expr.set("region", self._parse_field()) 7960 7961 return expr 7962 7963 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7964 return self._parse_field() 7965 7966 def _parse_copy(self) -> exp.Copy | exp.Command: 7967 start = self._prev 7968 7969 self._match(TokenType.INTO) 7970 7971 this = ( 7972 self._parse_select(nested=True, parse_subquery_alias=False) 7973 if self._match(TokenType.L_PAREN, advance=False) 7974 else self._parse_table(schema=True) 7975 ) 7976 7977 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7978 7979 files = self._parse_csv(self._parse_file_location) 7980 credentials = self._parse_credentials() 7981 7982 self._match_text_seq("WITH") 7983 7984 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7985 7986 # Fallback case 7987 if self._curr: 7988 return self._parse_as_command(start) 7989 7990 return self.expression( 7991 exp.Copy, 7992 this=this, 7993 kind=kind, 7994 credentials=credentials, 7995 files=files, 7996 params=params, 7997 ) 7998 7999 def _parse_normalize(self) -> exp.Normalize: 8000 return self.expression( 8001 exp.Normalize, 8002 this=self._parse_bitwise(), 8003 form=self._match(TokenType.COMMA) and self._parse_var(), 8004 ) 8005 8006 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8007 args = self._parse_csv(lambda: self._parse_lambda()) 8008 8009 this = seq_get(args, 0) 8010 decimals = seq_get(args, 1) 8011 8012 return expr_type( 8013 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8014 ) 8015 8016 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8017 if self._match_text_seq("COLUMNS", "(", advance=False): 8018 this = self._parse_function() 8019 if isinstance(this, exp.Columns): 8020 this.set("unpack", True) 8021 return this 8022 8023 return self.expression( 8024 exp.Star, 8025 **{ # type: ignore 8026 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8027 "replace": self._parse_star_op("REPLACE"), 8028 "rename": self._parse_star_op("RENAME"), 8029 }, 8030 ) 8031 8032 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8033 privilege_parts = [] 8034 8035 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8036 # (end of privilege list) or L_PAREN (start of column list) are met 8037 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8038 privilege_parts.append(self._curr.text.upper()) 8039 self._advance() 8040 8041 this = exp.var(" ".join(privilege_parts)) 8042 expressions = ( 8043 self._parse_wrapped_csv(self._parse_column) 8044 if self._match(TokenType.L_PAREN, advance=False) 8045 else None 8046 ) 8047 8048 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8049 8050 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8051 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8052 principal = self._parse_id_var() 8053 8054 if not principal: 8055 return None 8056 8057 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8058 8059 def _parse_grant(self) -> exp.Grant | exp.Command: 8060 start = self._prev 8061 8062 privileges = self._parse_csv(self._parse_grant_privilege) 8063 8064 self._match(TokenType.ON) 8065 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8066 8067 # Attempt to parse the securable e.g. MySQL allows names 8068 # such as "foo.*", "*.*" which are not easily parseable yet 8069 securable = self._try_parse(self._parse_table_parts) 8070 8071 if not securable or not self._match_text_seq("TO"): 8072 return self._parse_as_command(start) 8073 8074 principals = self._parse_csv(self._parse_grant_principal) 8075 8076 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8077 8078 if self._curr: 8079 return self._parse_as_command(start) 8080 8081 return self.expression( 8082 exp.Grant, 8083 privileges=privileges, 8084 kind=kind, 8085 securable=securable, 8086 principals=principals, 8087 grant_option=grant_option, 8088 ) 8089 8090 def _parse_overlay(self) -> exp.Overlay: 8091 return self.expression( 8092 exp.Overlay, 8093 **{ # type: ignore 8094 "this": self._parse_bitwise(), 8095 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8096 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8097 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8098 }, 8099 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1469 def __init__( 1470 self, 1471 error_level: t.Optional[ErrorLevel] = None, 1472 error_message_context: int = 100, 1473 max_errors: int = 3, 1474 dialect: DialectType = None, 1475 ): 1476 from sqlglot.dialects import Dialect 1477 1478 self.error_level = error_level or ErrorLevel.IMMEDIATE 1479 self.error_message_context = error_message_context 1480 self.max_errors = max_errors 1481 self.dialect = Dialect.get_or_raise(dialect) 1482 self.reset()
1494 def parse( 1495 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1496 ) -> t.List[t.Optional[exp.Expression]]: 1497 """ 1498 Parses a list of tokens and returns a list of syntax trees, one tree 1499 per parsed SQL statement. 1500 1501 Args: 1502 raw_tokens: The list of tokens. 1503 sql: The original SQL string, used to produce helpful debug messages. 1504 1505 Returns: 1506 The list of the produced syntax trees. 1507 """ 1508 return self._parse( 1509 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1510 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1512 def parse_into( 1513 self, 1514 expression_types: exp.IntoType, 1515 raw_tokens: t.List[Token], 1516 sql: t.Optional[str] = None, 1517 ) -> t.List[t.Optional[exp.Expression]]: 1518 """ 1519 Parses a list of tokens into a given Expression type. If a collection of Expression 1520 types is given instead, this method will try to parse the token list into each one 1521 of them, stopping at the first for which the parsing succeeds. 1522 1523 Args: 1524 expression_types: The expression type(s) to try and parse the token list into. 1525 raw_tokens: The list of tokens. 1526 sql: The original SQL string, used to produce helpful debug messages. 1527 1528 Returns: 1529 The target Expression. 1530 """ 1531 errors = [] 1532 for expression_type in ensure_list(expression_types): 1533 parser = self.EXPRESSION_PARSERS.get(expression_type) 1534 if not parser: 1535 raise TypeError(f"No parser registered for {expression_type}") 1536 1537 try: 1538 return self._parse(parser, raw_tokens, sql) 1539 except ParseError as e: 1540 e.errors[0]["into_expression"] = expression_type 1541 errors.append(e) 1542 1543 raise ParseError( 1544 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1545 errors=merge_errors(errors), 1546 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1586 def check_errors(self) -> None: 1587 """Logs or raises any found errors, depending on the chosen error level setting.""" 1588 if self.error_level == ErrorLevel.WARN: 1589 for error in self.errors: 1590 logger.error(str(error)) 1591 elif self.error_level == ErrorLevel.RAISE and self.errors: 1592 raise ParseError( 1593 concat_messages(self.errors, self.max_errors), 1594 errors=merge_errors(self.errors), 1595 )
Logs or raises any found errors, depending on the chosen error level setting.
1597 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1598 """ 1599 Appends an error in the list of recorded errors or raises it, depending on the chosen 1600 error level setting. 1601 """ 1602 token = token or self._curr or self._prev or Token.string("") 1603 start = token.start 1604 end = token.end + 1 1605 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1606 highlight = self.sql[start:end] 1607 end_context = self.sql[end : end + self.error_message_context] 1608 1609 error = ParseError.new( 1610 f"{message}. Line {token.line}, Col: {token.col}.\n" 1611 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1612 description=message, 1613 line=token.line, 1614 col=token.col, 1615 start_context=start_context, 1616 highlight=highlight, 1617 end_context=end_context, 1618 ) 1619 1620 if self.error_level == ErrorLevel.IMMEDIATE: 1621 raise error 1622 1623 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1625 def expression( 1626 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1627 ) -> E: 1628 """ 1629 Creates a new, validated Expression. 1630 1631 Args: 1632 exp_class: The expression class to instantiate. 1633 comments: An optional list of comments to attach to the expression. 1634 kwargs: The arguments to set for the expression along with their respective values. 1635 1636 Returns: 1637 The target expression. 1638 """ 1639 instance = exp_class(**kwargs) 1640 instance.add_comments(comments) if comments else self._add_comments(instance) 1641 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1648 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1649 """ 1650 Validates an Expression, making sure that all its mandatory arguments are set. 1651 1652 Args: 1653 expression: The expression to validate. 1654 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1655 1656 Returns: 1657 The validated expression. 1658 """ 1659 if self.error_level != ErrorLevel.IGNORE: 1660 for error_message in expression.error_messages(args): 1661 self.raise_error(error_message) 1662 1663 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.